Univariate TS Models

1. ACF & PACF Plots

Code
d_vacc_acf <- ggAcf(d_vacc_ts)+ggtitle("ACF Plot for Daily Vaccinations per Million") + theme_bw() +
  geom_segment(lineend = "butt", color = "#5a3196") +
    geom_hline(yintercept = 0, color = "#5a3196") 
d_vacc_acf1 <- ggAcf(diff(d_vacc_ts))+ggtitle("ACF Plot for Differented Daily Vaccinations per Million") + theme_bw() +
  geom_segment(lineend = "butt", color = "#5a3196") +
    geom_hline(yintercept = 0, color = "#5a3196") 
d_vacc_pacf <- ggPacf(d_vacc_ts)+ggtitle("PACF Plot for Daily Vaccinations per Million") + theme_bw()+
  geom_segment(lineend = "butt", color = "#5a3196") +
    geom_hline(yintercept = 0, color = "#5a3196") 
d_vacc_pacf1 <- ggPacf(diff(d_vacc_ts))+ggtitle("PACF Plot for Differented Daily Vaccinations per Million") + theme_bw()+
  geom_segment(lineend = "butt", color = "#5a3196") +
    geom_hline(yintercept = 0, color = "#5a3196") 
grid.arrange(d_vacc_acf, d_vacc_pacf, nrow=2)

Code
grid.arrange(d_vacc_acf1, d_vacc_pacf1, nrow=2)

Code
p_vacc_acf <- ggAcf(p_vacc_ts)+ggtitle("ACF Plot for People Vaccinated per Hundred") + theme_bw() +
  geom_segment(lineend = "butt", color = "#5a3196") +
    geom_hline(yintercept = 0, color = "#5a3196") 
p_vacc_acf1 <- ggAcf(diff(p_vacc_ts))+ggtitle("ACF Plot for Differented People Vaccinated per Hundred") + theme_bw() +
  geom_segment(lineend = "butt", color = "#5a3196") +
    geom_hline(yintercept = 0, color = "#5a3196") 
p_vacc_pacf <- ggPacf(p_vacc_ts)+ggtitle("PACF Plot for People Vaccinated per Hundred") + theme_bw()+
  geom_segment(lineend = "butt", color = "#5a3196") +
    geom_hline(yintercept = 0, color = "#5a3196") 
p_vacc_pacf1 <- ggPacf(diff(p_vacc_ts))+ggtitle("PACF Plot for Differented People Vaccinated per Hundred") + theme_bw()+
  geom_segment(lineend = "butt", color = "#5a3196") +
    geom_hline(yintercept = 0, color = "#5a3196") 
grid.arrange(p_vacc_acf, p_vacc_pacf, nrow=2)

Code
grid.arrange(p_vacc_acf1, p_vacc_pacf1, nrow=2)

Code
pf_vacc_acf <- ggAcf(pf_vacc_ts)+ggtitle("ACF Plot for People Fully Vaccinated per Hundred") + theme_bw() +
  geom_segment(lineend = "butt", color = "#5a3196") +
    geom_hline(yintercept = 0, color = "#5a3196") 
pf_vacc_acf1 <- ggAcf(diff(pf_vacc_ts))+ggtitle("ACF Plot for Differented People Fully Vaccinated per Hundred") + theme_bw() +
  geom_segment(lineend = "butt", color = "#5a3196") +
    geom_hline(yintercept = 0, color = "#5a3196") 
pf_vacc_pacf <- ggPacf(pf_vacc_ts)+ggtitle("PACF Plot for People Fully Vaccinated per Hundred") + theme_bw()+
  geom_segment(lineend = "butt", color = "#5a3196") +
    geom_hline(yintercept = 0, color = "#5a3196") 
pf_vacc_pacf1 <- ggPacf(diff(pf_vacc_ts))+ggtitle("PACF Plot for Differented People Fully Vaccinated per Hundred") + theme_bw()+
  geom_segment(lineend = "butt", color = "#5a3196") +
    geom_hline(yintercept = 0, color = "#5a3196")
grid.arrange(pf_vacc_acf, pf_vacc_pacf, nrow=2)

Code
grid.arrange(pf_vacc_acf1, pf_vacc_pacf1, nrow=2)

Code
case_acf <- ggAcf(case_ts)+ggtitle("ACF Plot for Newly Confirmed Cases") + theme_bw() +
  geom_segment(lineend = "butt", color = "#5a3196") +
    geom_hline(yintercept = 0, color = "#5a3196") 
case_acf1 <- ggAcf(diff(case_ts))+ggtitle("ACF Plot for Differented Newly Confirmed Cases") + theme_bw() +
  geom_segment(lineend = "butt", color = "#5a3196") +
    geom_hline(yintercept = 0, color = "#5a3196") 
case_pacf <- ggPacf(case_ts)+ggtitle("PACF Plot for Newly Confirmed Cases") + theme_bw()+
  geom_segment(lineend = "butt", color = "#5a3196") +
    geom_hline(yintercept = 0, color = "#5a3196") 
case_pacf1 <- ggPacf(diff(case_ts))+ggtitle("PACF Plot for Differented Newly Confirmed Cases") + theme_bw()+
  geom_segment(lineend = "butt", color = "#5a3196") +
    geom_hline(yintercept = 0, color = "#5a3196") 
grid.arrange(case_acf, case_pacf, nrow=2)

Code
grid.arrange(case_acf1, case_pacf1, nrow=2)

Code
dead_acf <- ggAcf(dead_ts)+ggtitle("ACF Plot for Dead Cases") + theme_bw() +
  geom_segment(lineend = "butt", color = "#5a3196") +
    geom_hline(yintercept = 0, color = "#5a3196") 
dead_acf1 <- ggAcf(diff(dead_ts))+ggtitle("ACF Plot for Differented Dead Cases") + theme_bw() +
  geom_segment(lineend = "butt", color = "#5a3196") +
    geom_hline(yintercept = 0, color = "#5a3196") 
dead_pacf <- ggPacf(dead_ts)+ggtitle("PACF Plot for Dead Cases") + theme_bw()+
  geom_segment(lineend = "butt", color = "#5a3196") +
    geom_hline(yintercept = 0, color = "#5a3196") 
dead_pacf1 <- ggPacf(diff(dead_ts))+ggtitle("PACF Plot for Differented Dead Cases") + theme_bw()+
  geom_segment(lineend = "butt", color = "#5a3196") +
    geom_hline(yintercept = 0, color = "#5a3196") 
grid.arrange(dead_acf, dead_pacf, nrow=2)

Code
grid.arrange(dead_acf1, dead_pacf1, nrow=2)

Code
hos1_acf <- ggAcf(hos_ts1)+ggtitle("ACF Plot for Number of Inpatient Beds") + theme_bw() +
  geom_segment(lineend = "butt", color = "#5a3196") +
    geom_hline(yintercept = 0, color = "#5a3196") 
hos1_acf1 <- ggAcf(diff(hos_ts1))+ggtitle("ACF Plot for Differented Number of Inpatient Beds") + theme_bw() +
  geom_segment(lineend = "butt", color = "#5a3196") +
    geom_hline(yintercept = 0, color = "#5a3196") 
hos1_pacf <- ggPacf(hos_ts1)+ggtitle("PACF Plot for Number of Inpatient Beds") + theme_bw()+
  geom_segment(lineend = "butt", color = "#5a3196") +
    geom_hline(yintercept = 0, color = "#5a3196") 
hos1_pacf1 <- ggPacf(diff(hos_ts1))+ggtitle("PACF Plot for Differented Number of Inpatient Beds") + theme_bw()+
  geom_segment(lineend = "butt", color = "#5a3196") +
    geom_hline(yintercept = 0, color = "#5a3196") 
grid.arrange(hos1_acf, hos1_pacf, nrow=2)

Code
grid.arrange(hos1_acf1, hos1_pacf1, nrow=2)

Code
hos2_acf <- ggAcf(hos_ts2)+ggtitle("ACF Plot for Number of Inpatient Beds Used for COVID") + theme_bw() +
  geom_segment(lineend = "butt", color = "#5a3196") +
    geom_hline(yintercept = 0, color = "#5a3196") 
hos2_acf1 <- ggAcf(diff(hos_ts2))+ggtitle("ACF Plot for Differented Number of Inpatient Beds Used for COVID") + theme_bw() +
  geom_segment(lineend = "butt", color = "#5a3196") +
    geom_hline(yintercept = 0, color = "#5a3196") 
hos2_pacf <- ggPacf(hos_ts2)+ggtitle("PACF Plot for Number of Inpatient Beds Used for COVID") + theme_bw()+
  geom_segment(lineend = "butt", color = "#5a3196") +
    geom_hline(yintercept = 0, color = "#5a3196") 
hos2_pacf1 <- ggPacf(diff(hos_ts2))+ggtitle("PACF Plot for Differented Number of Inpatient Beds Used for COVID") + theme_bw()+
  geom_segment(lineend = "butt", color = "#5a3196") +
    geom_hline(yintercept = 0, color = "#5a3196") 
grid.arrange(hos2_acf, hos2_pacf, nrow=2)

Code
grid.arrange(hos2_acf1, hos2_pacf1, nrow=2)

Code
hos3_acf <- ggAcf(hos_ts3)+ggtitle("ACF Plot for Utilization Rate of Inpatient Beds for COVID") + theme_bw() +
  geom_segment(lineend = "butt", color = "#5a3196") +
    geom_hline(yintercept = 0, color = "#5a3196") 
hos3_acf1 <- ggAcf(diff(hos_ts3))+ggtitle("ACF Plot for Differented Utilization Rate of Inpatient Beds for COVID") + theme_bw() +
  geom_segment(lineend = "butt", color = "#5a3196") +
    geom_hline(yintercept = 0, color = "#5a3196") 
hos3_pacf <- ggPacf(hos_ts3)+ggtitle("PACF Plot for Utilization Rate of Inpatient Beds for COVID") + theme_bw()+
  geom_segment(lineend = "butt", color = "#5a3196") +
    geom_hline(yintercept = 0, color = "#5a3196") 
hos3_pacf1 <- ggPacf(diff(hos_ts3))+ggtitle("PACF Plot for Differented Utilization Rate of Inpatient Beds for COVID") + theme_bw()+
  geom_segment(lineend = "butt", color = "#5a3196") +
    geom_hline(yintercept = 0, color = "#5a3196") 
grid.arrange(hos3_acf, hos3_pacf, nrow=2)

Code
grid.arrange(hos3_acf1, hos3_pacf1, nrow=2)

Code
emp_acf <- ggAcf(unemploy_ts)+ggtitle("ACF Plot for Unemployment Rate") + theme_bw() +
  geom_segment(lineend = "butt", color = "#5a3196") +
    geom_hline(yintercept = 0, color = "#5a3196") 
emp_acf1 <- ggAcf(diff(unemploy_ts))+ggtitle("ACF Plot for Differented Unemployment Rate") + theme_bw() +
  geom_segment(lineend = "butt", color = "#5a3196") +
    geom_hline(yintercept = 0, color = "#5a3196") 
emp_pacf <- ggPacf(unemploy_ts)+ggtitle("PACF Plot for Unemployment Rate") + theme_bw()+
  geom_segment(lineend = "butt", color = "#5a3196") +
    geom_hline(yintercept = 0, color = "#5a3196") 
emp_pacf1 <- ggPacf(diff(unemploy_ts))+ggtitle("PACF Plot for Differented Unemployment Rate") + theme_bw()+
  geom_segment(lineend = "butt", color = "#5a3196") +
    geom_hline(yintercept = 0, color = "#5a3196") 
grid.arrange(emp_acf, emp_pacf, nrow=2)

Code
grid.arrange(emp_acf1, emp_pacf1, nrow=2)

Code
stock_acf <- ggAcf(stock_ts)+ggtitle("ACF Plot for Pfizer Stock Price") + theme_bw() +
  geom_segment(lineend = "butt", color = "#5a3196") +
    geom_hline(yintercept = 0, color = "#5a3196") 
stock_acf1 <- ggAcf(diff(stock_ts))+ggtitle("ACF Plot for Differented Pfizer Stock Price") + theme_bw() +
  geom_segment(lineend = "butt", color = "#5a3196") +
    geom_hline(yintercept = 0, color = "#5a3196") 
stock_pacf <- ggPacf(stock_ts)+ggtitle("PACF Plot for Pfizer Stock Price") + theme_bw()+
  geom_segment(lineend = "butt", color = "#5a3196") +
    geom_hline(yintercept = 0, color = "#5a3196") 
stock_pacf1 <- ggPacf(diff(stock_ts))+ggtitle("PACF Plot for Differented Pfizer Stock Price") + theme_bw()+
  geom_segment(lineend = "butt", color = "#5a3196") +
    geom_hline(yintercept = 0, color = "#5a3196") 
grid.arrange(stock_acf, stock_pacf, nrow=2)

Code
grid.arrange(stock_acf1, stock_pacf1, nrow=2)

Code
demo_acf <- ggAcf(demo_ts)+ggtitle("ACF Plot for Support Rate for Democratic") + theme_bw() +
  geom_segment(lineend = "butt", color = "#5a3196") +
    geom_hline(yintercept = 0, color = "#5a3196") 
demo_acf1 <- ggAcf(diff(demo_ts))+ggtitle("ACF Plot for Differented Support Rate for Democratic") + theme_bw() +
  geom_segment(lineend = "butt", color = "#5a3196") +
    geom_hline(yintercept = 0, color = "#5a3196") 
demo_pacf <- ggPacf(demo_ts)+ggtitle("PACF Plot for Support Rate for Democratic") + theme_bw()+
  geom_segment(lineend = "butt", color = "#5a3196") +
    geom_hline(yintercept = 0, color = "#5a3196") 
demo_pacf1 <- ggPacf(diff(demo_ts))+ggtitle("PACF Plot for Differented Support Rate for Democratic") + theme_bw()+
  geom_segment(lineend = "butt", color = "#5a3196") +
    geom_hline(yintercept = 0, color = "#5a3196") 
grid.arrange(demo_acf, demo_pacf, nrow=2)

Code
grid.arrange(demo_acf1, demo_pacf1, nrow=2)

Code
inde_acf <- ggAcf(inde_ts)+ggtitle("ACF Plot for Support Rate for Independent") + theme_bw() +
  geom_segment(lineend = "butt", color = "#5a3196") +
    geom_hline(yintercept = 0, color = "#5a3196") 
inde_acf1 <- ggAcf(diff(inde_ts))+ggtitle("ACF Plot for Differented Support Rate for Independent") + theme_bw() +
  geom_segment(lineend = "butt", color = "#5a3196") +
    geom_hline(yintercept = 0, color = "#5a3196") 
inde_pacf <- ggPacf(inde_ts)+ggtitle("PACF Plot for Support Rate for Independent") + theme_bw()+
  geom_segment(lineend = "butt", color = "#5a3196") +
    geom_hline(yintercept = 0, color = "#5a3196") 
inde_pacf1 <- ggPacf(diff(inde_ts))+ggtitle("PACF Plot for Differented Support Rate for Independent") + theme_bw()+
  geom_segment(lineend = "butt", color = "#5a3196") +
    geom_hline(yintercept = 0, color = "#5a3196")
grid.arrange(inde_acf, inde_pacf, nrow=2)

Code
grid.arrange(inde_acf1, inde_pacf1, nrow=2)

Code
rep_acf <- ggAcf(rep_ts)+ggtitle("ACF Plot for Support Rate for Republican") + theme_bw() +
  geom_segment(lineend = "butt", color = "#5a3196") +
    geom_hline(yintercept = 0, color = "#5a3196") 
rep_acf1 <- ggAcf(diff(rep_ts))+ggtitle("ACF Plot for Differented Support Rate for Republican") + theme_bw() +
  geom_segment(lineend = "butt", color = "#5a3196") +
    geom_hline(yintercept = 0, color = "#5a3196") 
rep_pacf <- ggPacf(rep_ts)+ggtitle("PACF Plot for Support Rate for Republican") + theme_bw()+
  geom_segment(lineend = "butt", color = "#5a3196") +
    geom_hline(yintercept = 0, color = "#5a3196") 
rep_pacf1 <- ggPacf(diff(rep_ts))+ggtitle("PACF Plot for Differented Support Rate for Republican") + theme_bw()+
  geom_segment(lineend = "butt", color = "#5a3196") +
    geom_hline(yintercept = 0, color = "#5a3196") 
grid.arrange(rep_acf, rep_pacf, nrow=2)

Code
grid.arrange(rep_acf1, rep_pacf1, nrow=2)

Number of Daily Vaccinations Per Million: ACF Plot has significant lags at 1 and 2 so p = 1, 2. PACF Plot has significant lags at 1 and 2 so q = 1, 2. Regarding stationarity, the ACF plot reveals autocorrelation values surpassing the threshold represented by the dashed line. Additionally, the presence of autocorrelation values above the threshold in the ACF plot indicates stationarity; this suggests that the time series has consistent, predictable patterns over time, confirmed by significant autocorrelation at multiple lags. This stationary behavior is crucial for the effective modeling and forecasting of the series.

Number of People Vaccinated Per Hundred: ACF Plot has significant lags at 1-3 so p = 1, 2, 3. PACF Plot has significant lags at 1 so q = 1. Regarding stationarity, the ACF plot reveals autocorrelation values surpassing the threshold represented by the dashed line.Additionally, the presence of autocorrelation values above the threshold in the ACF plot indicates stationarity; this suggests that the time series has consistent, predictable patterns over time, confirmed by significant autocorrelation at multiple lags. This stationary behavior is crucial for the effective modeling and forecasting of the series.

Number of People Fully Vaccinated Per Hundred: ACF Plot has significant lags at 1-3 so p = 1, 2, 3. PACF Plot has significant lags at 1 so q = 1. Regarding stationarity, the ACF plot reveals autocorrelation values surpassing the threshold represented by the dashed line. Additionally, the presence of autocorrelation values above the threshold in the ACF plot indicates stationarity; this suggests that the time series has consistent, predictable patterns over time, confirmed by significant autocorrelation at multiple lags. This stationary behavior is crucial for the effective modeling and forecasting of the series.

Number of Newly Confirmed Cases: ACF Plot has significant lags at 1-10 so p = 1, 2, 3,4, 5, 6, 7, 8, 9, 10. However, in general we care about only the first couple of lags, in this case the first 3. PACF Plot has significant lags at 1 so q = 1. Regarding stationarity, the ACF plot reveals autocorrelation values surpassing the threshold represented by the dashed line.Additionally, the presence of autocorrelation values above the threshold in the ACF plot indicates stationarity; this suggests that the time series has consistent, predictable patterns over time, confirmed by significant autocorrelation at multiple lags. This stationary behavior is crucial for the effective modeling and forecasting of the series.

Number of Death Cases: ACF Plot has significant lags at 1-10 so p = 1, 2, 3,4, 5, 6, 7, 8, 9, 10. However, in general we care about only the first couple of lags, in this case the first 3. PACF Plot has significant lags at 1 so q = 1. Regarding stationarity, the ACF plot reveals autocorrelation values surpassing the threshold represented by the dashed line. Additionally, the presence of autocorrelation values above the threshold in the ACF plot indicates stationarity; this suggests that the time series has consistent, predictable patterns over time, confirmed by significant autocorrelation at multiple lags. This stationary behavior is crucial for the effective modeling and forecasting of the series.

Number of Inpatient Beds: ACF Plot has significant lags at 1 and 2 so p = 1, 2. PACF Plot has significant lags at 1 and 4 so q = 1, 4. Regarding stationarity, the ACF plot reveals autocorrelation values surpassing the threshold represented by the dashed line.Additionally, the presence of autocorrelation values above the threshold in the ACF plot indicates stationarity; this suggests that the time series has consistent, predictable patterns over time, confirmed by significant autocorrelation at multiple lags. This stationary behavior is crucial for the effective modeling and forecasting of the series.

Number of Inpatient Beds Used for COVID: ACF Plot has significant lags at 1 and 2 so p = 1, 2. PACF Plot has significant lags at 1 and 2 so q = 1, 2. Regarding stationarity, the ACF plot reveals autocorrelation values surpassing the threshold represented by the dashed line. Additionally, the presence of autocorrelation values above the threshold in the ACF plot indicates stationarity; this suggests that the time series has consistent, predictable patterns over time, confirmed by significant autocorrelation at multiple lags. This stationary behavior is crucial for the effective modeling and forecasting of the series.

Utilization Rate for Inpatient Beds Used for COVID: ACF Plot has significant lags at 1 so p = 1. PACF Plot has significant lags at 1-3 so q = 1, 2, 3. Regarding stationarity, the ACF plot reveals autocorrelation values surpassing the threshold represented by the dashed line. Additionally, the presence of autocorrelation values above the threshold in the ACF plot indicates stationarity; this suggests that the time series has consistent, predictable patterns over time, confirmed by significant autocorrelation at multiple lags. This stationary behavior is crucial for the effective modeling and forecasting of the series.

Unemployment Rate: ACF Plot has significant lags at 1 so p = 1. PACF Plot has significant lags at 1 so q = 1. Regarding stationarity, the ACF plot reveals autocorrelation values surpassing the threshold represented by the dashed line. Additionally, the presence of autocorrelation values above the threshold in the ACF plot indicates stationarity; this suggests that the time series has consistent, predictable patterns over time, confirmed by significant autocorrelation at multiple lags. This stationary behavior is crucial for the effective modeling and forecasting of the series.

Pfizer Stock Price: The ACF and PACF plots provide critical insights for determining the parameters of our time series model. The ACF plot shows a significant lag at 1-10, suggesting a p-value of 1-10 for the AR component. Similarly, the PACF plot shows a significant lag at 1, indicating a q-value of 1 for the MA component. Additionally, the presence of autocorrelation values above the threshold in the ACF plot indicates stationarity; this suggests that the time series has consistent, predictable patterns over time, confirmed by significant autocorrelation at multiple lags. This stationary behavior is crucial for the effective modeling and forecasting of the series.

Support Rate for Democratic: There are no lags over the dashed line in the ACF plot, which indicates that there is no significant autocorrelation in the series beyond the lag indicated by the highest peak. In this cases, the ACF plot suggests that there is no systematic relationship between the observations at different time points. This lack of autocorrelation implies that the series is likely stationary, as there is no discernible pattern of dependence between consecutive observations.

Support Rate for Independent: There are no lags over the dashed line in the ACF plot, which indicates that there is no significant autocorrelation in the series beyond the lag indicated by the highest peak. In this cases, the ACF plot suggests that there is no systematic relationship between the observations at different time points. This lack of autocorrelation implies that the series is likely stationary, as there is no discernible pattern of dependence between consecutive observations.

Support Rate for Republican: The ACF and PACF plots provide critical insights for determining the parameters of our time series model. The ACF plot shows a significant lag at 1, suggesting a p-value of 1 for the AR component. Similarly, the PACF plot shows a significant lag at 1, indicating a q-value of 1 for the MA component. Additionally, the presence of autocorrelation values above the threshold in the ACF plot indicates stationarity; this suggests that the time series has consistent, predictable patterns over time, confirmed by significant autocorrelation at multiple lags. This stationary behavior is crucial for the effective modeling and forecasting of the series.

2. Dickey-Fuller Test

Code
tseries::adf.test(d_vacc_ts)

    Augmented Dickey-Fuller Test

data:  d_vacc_ts
Dickey-Fuller = -4.1125, Lag order = 3, p-value = 0.0183
alternative hypothesis: stationary
Code
p_vacc_ts1 <- na.omit(p_vacc_ts)
tseries::adf.test(p_vacc_ts1)

    Augmented Dickey-Fuller Test

data:  p_vacc_ts1
Dickey-Fuller = -3.3701, Lag order = 3, p-value = 0.08067
alternative hypothesis: stationary
Code
pf_vacc_ts1 <- na.omit(pf_vacc_ts)
tseries::adf.test(pf_vacc_ts1)

    Augmented Dickey-Fuller Test

data:  pf_vacc_ts1
Dickey-Fuller = -7.4927, Lag order = 3, p-value = 0.01
alternative hypothesis: stationary
Code
tseries::adf.test(case_ts)

    Augmented Dickey-Fuller Test

data:  case_ts
Dickey-Fuller = -1.8178, Lag order = 3, p-value = 0.6457
alternative hypothesis: stationary
Code
tseries::adf.test(dead_ts)

    Augmented Dickey-Fuller Test

data:  dead_ts
Dickey-Fuller = -0.54605, Lag order = 3, p-value = 0.9754
alternative hypothesis: stationary
Code
tseries::adf.test(hos_ts1)

    Augmented Dickey-Fuller Test

data:  hos_ts1
Dickey-Fuller = -12.058, Lag order = 3, p-value = 0.01
alternative hypothesis: stationary
Code
tseries::adf.test(hos_ts2)

    Augmented Dickey-Fuller Test

data:  hos_ts2
Dickey-Fuller = -2.9428, Lag order = 3, p-value = 0.1968
alternative hypothesis: stationary
Code
tseries::adf.test(hos_ts3)

    Augmented Dickey-Fuller Test

data:  hos_ts3
Dickey-Fuller = -2.8936, Lag order = 3, p-value = 0.2165
alternative hypothesis: stationary
Code
tseries::adf.test(unemploy_ts)

    Augmented Dickey-Fuller Test

data:  unemploy_ts
Dickey-Fuller = -8.8759, Lag order = 2, p-value = 0.01
alternative hypothesis: stationary
Code
tseries::adf.test(stock_ts)

    Augmented Dickey-Fuller Test

data:  stock_ts
Dickey-Fuller = -3.0918, Lag order = 3, p-value = 0.1381
alternative hypothesis: stationary
Code
tseries::adf.test(demo_ts)

    Augmented Dickey-Fuller Test

data:  demo_ts
Dickey-Fuller = -3.4688, Lag order = 3, p-value = 0.05607
alternative hypothesis: stationary
Code
tseries::adf.test(inde_ts)

    Augmented Dickey-Fuller Test

data:  inde_ts
Dickey-Fuller = -4.2027, Lag order = 3, p-value = 0.01
alternative hypothesis: stationary
Code
tseries::adf.test(rep_ts)

    Augmented Dickey-Fuller Test

data:  rep_ts
Dickey-Fuller = -3.4937, Lag order = 3, p-value = 0.05222
alternative hypothesis: stationary

In our project, we delve into an array of statistical series to discern patterns and ascertain stationarity, crucial for understanding sentiment impacts on the stock prices of leading tech companies and broader socio-economic indicators. Our methodology employs rigorous statistical tests, complemented by Autocorrelation Function (ACF) plots, to scrutinize the data’s behavior over time.

Number of Daily Vaccinations Per Million: A p-value below 0.05 signals sufficient grounds to reject the null hypothesis at a 5% significance level, indicating stationarity in our series. This finding, however, contrasts with prior conclusions, suggesting the ACF plot’s superior accuracy, which points toward non-stationarity.

Number of People Vaccinated Per Hundred: The p-value, exceeding 0.05, reveals an insufficient basis to reject the null hypothesis, indicating a non-stationary series. This necessitates further modifications for stationarity, reinforcing conclusions from earlier analyses, including a significant lag order of 3.

Number of People Fully Vaccinated Per Hundred: With a p-value below 0.05, we find adequate evidence to reject the null hypothesis, suggesting stationarity. Yet, this contradicts previous findings, with the ACF plot indicating non-stationarity, challenging our initial conclusion.

Number of Newly Confirmed Cases: A p-value above 0.05 indicates a lack of sufficient evidence to dismiss the null hypothesis, suggesting non-stationarity. This aligns with earlier observations, necessitating adjustments for stationarity, including a noted lag order of 3.

Number of Death Cases: The p-value, again above 0.05, underscores a lack of adequate evidence to reject the null hypothesis, signaling a non-stationary series and the need for further data adjustments. This finding is consistent with prior analyses.

Number of Inpatient Beds: Here, a p-value below 0.05 provides enough justification to reject the null hypothesis, suggesting a stationary series. Nevertheless, this result is at odds with previous analyses, indicating non-stationarity based on the ACF plot.

Number of Inpatient Beds Used for COVID: The p-value surpassing 0.05 suggests insufficient evidence to reject the null hypothesis, pointing to a non-stationary series that requires adjustments, corroborating earlier findings and the significance of a lag order of 3.

Utilization Rate for Inpatient Beds Used for COVID: A high p-value indicates the series’ non-stationarity, echoing the need for adjustments to achieve stationarity and supporting earlier conclusions, including a lag order of 3.

Unemployment Rate: A low p-value indicates sufficient evidence to reject the null hypothesis, suggesting stationarity. However, this contrasts with previous examples, with the ACF plot indicating non-stationarity.

Pfizer Stock Price: With a p-value exceeding 0.05, there’s insufficient evidence to reject the null hypothesis, indicating a non-stationary series requiring adjustments, consistent with earlier findings, including a lag order of 3.

Support Rate for Democratic: A high p-value reveals a lack of evidence to reject the null hypothesis, suggesting non-stationarity and the need for adjustments, contradicting earlier conclusions of stationarity.

Support Rate for Independent: A low p-value provides ample evidence to reject the null hypothesis, indicating a stationary series. This finding aligns with prior conclusions, affirming the series’ stationarity.

Support Rate for Republican: The p-value, exceeding 0.05, indicates insufficient evidence to reject the null hypothesis, suggesting a non-stationary series that necessitates adjustments, in line with earlier analyses.

Through this detailed exploration, we meticulously gauge the stationarity of diverse series, juxtaposing statistical test results against ACF plot insights to draw nuanced conclusions on the dynamic interplay between sentiment, stock price movements, and broader socio-economic indicators.

3. Detrend VS Difference

Code
fit_d_vacc = lm(d_vacc_ts~time(d_vacc_ts), na.action=NULL) 
plot1<-autoplot(resid(fit_d_vacc), main="Detrended", colour = "#5a3196") + theme_bw()
plot2<-autoplot(diff(d_vacc_ts), main="First Difference", colour = "#5a3196") + theme_bw()

grid.arrange(plot1, plot2,nrow=2)

Code
fit_p_vacc = lm(p_vacc_ts1~time(p_vacc_ts1), na.action=NULL) 
plot1<-autoplot(resid(fit_p_vacc), main="Detrended", colour = "#5a3196") + theme_bw()
plot2<-autoplot(diff(p_vacc_ts1), main="First Difference", colour = "#5a3196") + theme_bw()

grid.arrange(plot1, plot2,nrow=2)

Code
fit_pf_vacc = lm(pf_vacc_ts1~time(pf_vacc_ts1), na.action=NULL) 
plot1<-autoplot(resid(fit_pf_vacc), main="Detrended", colour = "#5a3196") + theme_bw()
plot2<-autoplot(diff(pf_vacc_ts1), main="First Difference", colour = "#5a3196") + theme_bw()

grid.arrange(plot1, plot2,nrow=2)

Code
fit_case = lm(case_ts~time(case_ts), na.action=NULL) 
plot1<-autoplot(resid(fit_case), main="Detrended", colour = "#5a3196") + theme_bw()
plot2<-autoplot(diff(case_ts), main="First Difference", colour = "#5a3196") + theme_bw()

grid.arrange(plot1, plot2,nrow=2)

Code
fit_dead = lm(dead_ts~time(dead_ts), na.action=NULL) 
plot1<-autoplot(resid(fit_dead), main="Detrended", colour = "#5a3196") + theme_bw()
plot2<-autoplot(diff(dead_ts), main="First Difference", colour = "#5a3196") + theme_bw()

grid.arrange(plot1, plot2,nrow=2)

Code
fit_hos1 = lm(hos_ts1~time(hos_ts1), na.action=NULL) 
plot1<-autoplot(resid(fit_hos1), main="Detrended", colour = "#5a3196") + theme_bw()
plot2<-autoplot(diff(hos_ts1), main="First Difference", colour = "#5a3196") + theme_bw()

grid.arrange(plot1, plot2,nrow=2)

Code
fit_hos2 = lm(hos_ts2~time(hos_ts2), na.action=NULL) 
plot1<-autoplot(resid(fit_hos2), main="Detrended", colour = "#5a3196") + theme_bw()
plot2<-autoplot(diff(hos_ts2), main="First Difference", colour = "#5a3196") + theme_bw()

grid.arrange(plot1, plot2,nrow=2)

Code
fit_hos3 = lm(hos_ts3~time(hos_ts3), na.action=NULL) 
plot1<-autoplot(resid(fit_hos3), main="Detrended", colour = "#5a3196") + theme_bw()
plot2<-autoplot(diff(hos_ts3), main="First Difference", colour = "#5a3196") + theme_bw()

grid.arrange(plot1, plot2,nrow=2)

Code
fit_emp = lm(unemploy_ts~time(unemploy_ts), na.action=NULL) 
plot1<-autoplot(resid(fit_emp), main="Detrended", colour = "#5a3196") + theme_bw()
plot2<-autoplot(diff(unemploy_ts), main="First Difference", colour = "#5a3196") + theme_bw()

grid.arrange(plot1, plot2,nrow=2)

Code
fit_stock = lm(stock_ts~time(stock_ts), na.action=NULL) 
plot1<-autoplot(resid(fit_stock), main="Detrended", colour = "#5a3196") + theme_bw()
plot2<-autoplot(diff(stock_ts), main="First Difference", colour = "#5a3196") + theme_bw()

grid.arrange(plot1, plot2,nrow=2)

Code
fit_demo = lm(demo_ts~time(demo_ts), na.action=NULL) 
plot1<-autoplot(resid(fit_demo), main="Detrended", colour = "#5a3196") + theme_bw()
plot2<-autoplot(diff(demo_ts), main="First Difference", colour = "#5a3196") + theme_bw()

grid.arrange(plot1, plot2,nrow=2)

Code
fit_inde = lm(inde_ts~time(inde_ts), na.action=NULL) 
plot1<-autoplot(resid(fit_inde), main="Detrended", colour = "#5a3196") + theme_bw()
plot2<-autoplot(diff(inde_ts), main="First Difference", colour = "#5a3196") + theme_bw()

grid.arrange(plot1, plot2,nrow=2)

Code
fit_rep = lm(rep_ts~time(rep_ts), na.action=NULL) 
plot1<-autoplot(resid(fit_rep), main="Detrended", colour = "#5a3196") + theme_bw()
plot2<-autoplot(diff(rep_ts), main="First Difference", colour = "#5a3196") + theme_bw()

grid.arrange(plot1, plot2,nrow=2)

Detrending and differencing stand as pivotal techniques in the realm of time series analysis, each aimed at achieving the crucial condition of stationarity within a dataset. While navigating the same goal of trend elimination, these methodologies diverge in their approach and application nuances.

Detrending is a targeted process aimed squarely at eradicating the underlying trend from the dataset. This is accomplished by first meticulously estimating the trend component that permeates the time series and then subtracting this estimated trend from the original dataset. The outcome is a transformed series where the original mean has been adjusted to center around zero, effectively neutralizing the trend influence. However, this transformation is not a panacea; detrended data can still exhibit non-stationary characteristics, such as seasonality or variance instabilities, that require further intervention.

Conversely, differencing operates under a broader scope, addressing stationarity by focusing on the differences between consecutive observations. This method is encapsulated by the formula:

\[\Delta y_t = y_t - y_{t-1}\]

where \(\Delta y_t\) represents the difference between the current observation \(y_t\) and its predecessor \(y_{t-1}\). Through this simple yet effective mechanism, differencing excels at mitigating linear trends and highlighting the dynamic changes between data points. Its strength lies particularly in contexts where the time series displays a consistent directional trend, making it a robust choice for such scenarios.

However, it’s worth noting that while differencing is adept at ironing out linear trends, it may falter when faced with nonlinear trends or pronounced seasonal fluctuations. The essence of differencing lies in its ability to simplify the series to a form where patterns and structures become more discernable, albeit at the potential cost of oversimplification in certain complex scenarios.

The decision to employ detrending or differencing hinges on a thorough examination of the time series at hand. The specific characteristics of the dataset, including the nature of its trends and seasonalities, dictate the most appropriate method for achieving stationarity. This choice is not merely technical but strategic, laying the foundation for deeper insights and more accurate forecasts in the pursuit of time series analysis.

4. ARIMA(p,d,q)

In this section, our aim is to identify all potential values for the autoregressive (AR) parameter (p), the moving average (MA) parameter (q), and the differencing parameter (d) based on the autocorrelation function (ACF) and partial autocorrelation function (PACF) plots of the original data.

To determine the value of p, we examine the most significant lags from the PACF plot, which helps us identify the lag orders where the correlation is not accounted for by previous lags.

Conversely, for the value of q, we focus on the most significant lags from the ACF plot, indicating the correlation between observations at different time lags, which informs us about the lag orders that may require inclusion in the moving average model.

Given that we have differenced all series once, the d value is consistently set to 1. However, when evaluating the Akaike Information Criterion (AIC) and Bayesian Information Criterion (BIC) for model selection, we explore both d=0 and d=1 to ensure comprehensive assessment and comparison of model performance.

Daily vaccinations time series: - q = 0,1,2 - d = 0,1 - p = 0,1,2

People vaccinated time series: - q = 0,1,2,3 - d = 0,1 - p = 0,1

People fully vaccinated time series: - q = 0,1,2,3 - d = 0,1 - p = 0,1

Newly confirmed case time series: - q = 0,1,2,3,4,5,6,7,8,9,10 - d = 0,1 - p = 0,1

Death case time series: - q = 0,1,2,3,4,5,6,7,8,9,10 - d = 0,1 - p = 0,1

Inpatient bed time series: - q = 0,1,2 - d = 0,1 - p = 0,1

Inpatient bed used for COVID time series: - q = 0,1,2 - d = 0,1 - p = 0,1,2

Utilization rate for inpatient bed used for COVID time series: - q = 0,1 - d = 0,1 - p = 0,1,2,3

Unemployment rate time series: - q = 0,1 - d = 0,1 - p = 0,1

Pfizer stock price time series: - q = 0,1,2,3,4,5,6,7,8,9,10 - d = 0,1 - p = 0,1

Support rate for democratic time series: - q = 0,1 - d = 0,1 - p = 0,1

Support rate for independent time series: - q = 0,1 - d = 0,1 - p = 0,1

Support rate for republican time series: - q = 0,1 - d = 0,1 - p = 0,1

Code
d=1
i=1
dvacc= data.frame()
ls=matrix(rep(NA,6*18),nrow=18) 


for (p in 0:2)
{
  for(q in 0:2)
  {
    for(d in 0:1)
    {
      
      if(p-1+d+q-1<=8) #usual threshold
      {
        
        model<- Arima(diff(d_vacc_ts),order=c(p,d,q),include.drift=TRUE) 
        ls[i,]= c(p,d,q,model$aic,model$bic,model$aicc)
        i=i+1
        #print(i)
        
      }
      
    }
  }
}

dvacc= as.data.frame(ls)
names(dvacc)= c("p","d","q","AIC","BIC","AICc")

#dvacc
knitr::kable(dvacc)
p d q AIC BIC AICc
0 0 0 738.4524 742.5543 739.4124
0 1 0 708.6048 711.2692 709.0848
0 0 1 729.7130 735.1822 731.3797
0 1 1 710.4991 714.4957 711.4991
0 0 2 727.4084 734.2449 730.0171
0 1 2 708.3318 713.6606 710.0709
1 0 0 730.5130 735.9822 732.1797
1 1 0 710.5352 714.5318 711.5352
1 0 1 730.2149 737.0513 732.8236
1 1 1 712.4472 717.7760 714.1863
1 0 2 730.9408 739.1446 734.7590
1 1 2 708.3072 714.9682 711.0345
2 0 0 727.3140 734.1505 729.9227
2 1 0 711.4293 716.7581 713.1684
2 0 1 718.6656 726.8694 722.4838
2 1 1 706.1272 712.7882 708.8544
2 0 2 730.3437 739.9148 735.6771
2 1 2 701.6110 709.6042 705.6110
Code
dvacc[which.min(dvacc$AIC),]
   p d q     AIC      BIC    AICc
18 2 1 2 701.611 709.6042 705.611
Code
dvacc[which.min(dvacc$BIC),]
   p d q     AIC      BIC    AICc
18 2 1 2 701.611 709.6042 705.611
Code
dvacc[which.min(dvacc$AICc),]
   p d q     AIC      BIC    AICc
18 2 1 2 701.611 709.6042 705.611

The model with the lowest AIC, BIC, AICc is ARIMA(2,1,2). So the best model is ARIMA(2,1,2).

Code
d=1
i=1
dvacc= data.frame()
ls=matrix(rep(NA,6*16),nrow=16) 


for (p in 0:1)
{
  for(q in 0:3)
  {
    for(d in 0:1)
    {
      
      if(p-1+d+q-1<=8) #usual threshold
      {
        
        model<- Arima(diff(p_vacc_ts),order=c(p,d,q),include.drift=TRUE) 
        ls[i,]= c(p,d,q,model$aic,model$bic,model$aicc)
        i=i+1
        #print(i)
        
      }
      
    }
  }
}

dvacc= as.data.frame(ls)
names(dvacc)= c("p","d","q","AIC","BIC","AICc")

#dvacc
knitr::kable(dvacc)
p d q AIC BIC AICc
0 0 0 128.2556 132.2522 129.2556
0 1 0 111.3475 113.9392 111.8475
0 0 1 111.4001 116.7289 113.1393
0 1 1 108.9110 112.7985 109.9545
0 0 2 103.5130 110.1741 106.2403
0 1 2 108.8841 114.0674 110.7023
0 0 3 105.4507 113.4439 109.4507
0 1 3 102.3835 108.8627 105.2407
1 0 0 114.2362 119.5650 115.9753
1 1 0 111.2512 115.1387 112.2947
1 0 1 109.0870 115.7480 111.8143
1 1 1 110.8864 116.0697 112.7046
1 0 2 105.4663 113.4596 109.4663
1 1 2 107.7595 114.2387 110.6167
1 0 3 105.7102 115.0357 111.3102
1 1 3 104.2137 111.9888 108.4137
Code
dvacc[which.min(dvacc$AIC),]
  p d q      AIC      BIC     AICc
8 0 1 3 102.3835 108.8627 105.2407
Code
dvacc[which.min(dvacc$BIC),]
  p d q      AIC      BIC     AICc
8 0 1 3 102.3835 108.8627 105.2407
Code
dvacc[which.min(dvacc$AICc),]
  p d q      AIC      BIC     AICc
8 0 1 3 102.3835 108.8627 105.2407

The model with the lowest AIC, BIC, AICc is ARIMA(0,1,3). So the best model is ARIMA(0,1,3).

Code
d=1
i=1
dvacc= data.frame()
ls=matrix(rep(NA,6*16),nrow=16) 


for (p in 0:1)
{
  for(q in 0:3)
  {
    for(d in 0:1)
    {
      
      if(p-1+d+q-1<=8) #usual threshold
      {
        
        model<- Arima(diff(pf_vacc_ts),order=c(p,d,q),include.drift=TRUE) 
        ls[i,]= c(p,d,q,model$aic,model$bic,model$aicc)
        i=i+1
        #print(i)
        
      }
      
    }
  }
}

dvacc= as.data.frame(ls)
names(dvacc)= c("p","d","q","AIC","BIC","AICc")

#dvacc
knitr::kable(dvacc)
p d q AIC BIC AICc
0 0 0 126.67506 130.6717 127.67506
0 1 0 106.80977 109.4014 107.30977
0 0 1 107.40914 112.7380 109.14827
0 1 1 99.34580 103.2333 100.38927
0 0 2 96.18947 102.8505 98.91674
0 1 2 97.58093 102.7643 99.39911
0 0 3 95.60878 103.6020 99.60878
0 1 3 95.34163 101.8208 98.19878
1 0 0 110.72871 116.0575 112.46784
1 1 0 101.27973 105.1672 102.32320
1 0 1 101.89615 108.5572 104.62342
1 1 1 100.28735 105.4707 102.10553
1 0 2 96.71949 104.7127 100.71949
1 1 2 99.41388 105.8931 102.27102
1 0 3 97.32260 106.6480 102.92260
1 1 3 95.30117 103.0762 99.50117
Code
dvacc[which.min(dvacc$AIC),]
   p d q      AIC      BIC     AICc
16 1 1 3 95.30117 103.0762 99.50117
Code
dvacc[which.min(dvacc$BIC),]
  p d q      AIC      BIC     AICc
8 0 1 3 95.34163 101.8208 98.19878
Code
dvacc[which.min(dvacc$AICc),]
  p d q      AIC      BIC     AICc
8 0 1 3 95.34163 101.8208 98.19878

The model with the lowest BIC, AICc is ARIMA(0,1,3). While the model with the lowest AIC is ARIMA(1,1,3), the significantly lower BIC and AICc values of ARIMA(0,1,3) underscore its stronger performance. Therefore, based on the evaluation metrics, ARIMA(0,1,3) emerges as the optimal model.

Code
d=1
i=1
dvacc= data.frame()
ls=matrix(rep(NA,6*40),nrow=40) 


for (p in 0:1)
{
  for(q in 0:10)
  {
    for(d in 0:1)
    {
      
      if(p-1+d+q-1<=8) #usual threshold
      {
        
        model<- Arima(diff(case_ts),order=c(p,d,q),include.drift=TRUE) 
        ls[i,]= c(p,d,q,model$aic,model$bic,model$aicc)
        i=i+1
        #print(i)
        
      }
      
    }
  }
}

dvacc= as.data.frame(ls)
names(dvacc)= c("p","d","q","AIC","BIC","AICc")

#dvacc
knitr::kable(dvacc)
p d q AIC BIC AICc
0 0 0 1372.358 1377.571 1372.989
0 1 0 1329.589 1333.016 1329.904
0 0 1 1346.930 1353.881 1348.011
0 1 1 1328.008 1333.149 1328.657
0 0 2 1346.896 1355.584 1348.562
0 1 2 1317.681 1324.535 1318.792
0 0 3 1348.882 1359.308 1351.282
0 1 3 1318.245 1326.813 1319.959
0 0 4 1350.638 1362.801 1353.932
0 1 4 1320.171 1330.452 1322.642
0 0 5 1352.273 1366.174 1356.637
0 1 5 1322.083 1334.078 1325.477
0 0 6 1353.981 1369.620 1359.606
0 1 6 1323.493 1337.202 1327.993
0 0 7 1355.958 1373.334 1363.054
0 1 7 1324.984 1340.406 1330.791
0 0 8 1357.880 1376.995 1366.680
0 1 8 1326.827 1343.963 1334.161
0 0 9 1359.015 1379.867 1369.774
0 1 9 1328.814 1347.664 1337.918
0 0 10 1359.727 1382.317 1372.727
1 0 0 1356.483 1363.434 1357.564
1 1 0 1331.098 1336.238 1331.746
1 0 1 1347.042 1355.730 1348.708
1 1 1 1326.755 1333.610 1327.866
1 0 2 1348.887 1359.313 1351.287
1 1 2 1318.278 1326.846 1319.992
1 0 3 1350.894 1363.057 1354.188
1 1 3 1320.189 1330.471 1322.660
1 0 4 1352.529 1366.430 1356.892
1 1 4 1321.988 1333.983 1325.382
1 0 5 1354.097 1369.736 1359.722
1 1 5 1323.990 1337.699 1328.490
1 0 6 1355.970 1373.347 1363.067
1 1 6 1325.066 1340.489 1330.873
1 0 7 1356.421 1375.536 1365.221
1 1 7 1326.889 1344.025 1334.222
1 0 8 1357.666 1378.518 1368.425
1 1 8 1328.824 1347.673 1337.927
1 0 9 1359.205 1381.794 1372.205
Code
dvacc[which.min(dvacc$AIC),]
  p d q      AIC      BIC     AICc
6 0 1 2 1317.681 1324.535 1318.792
Code
dvacc[which.min(dvacc$BIC),]
  p d q      AIC      BIC     AICc
6 0 1 2 1317.681 1324.535 1318.792
Code
dvacc[which.min(dvacc$AICc),]
  p d q      AIC      BIC     AICc
6 0 1 2 1317.681 1324.535 1318.792

The model with the lowest AIC, BIC, AICc is ARIMA(0,1,2). So the best model is ARIMA(0,1,2).

Code
d=1
i=1
dvacc= data.frame()
ls=matrix(rep(NA,6*40),nrow=40) 


for (p in 0:1)
{
  for(q in 0:10)
  {
    for(d in 0:1)
    {
      
      if(p-1+d+q-1<=8) #usual threshold
      {
        
        model<- Arima(diff(dead_ts),order=c(p,d,q),include.drift=TRUE) 
        ls[i,]= c(p,d,q,model$aic,model$bic,model$aicc)
        i=i+1
        #print(i)
        
      }
      
    }
  }
}

dvacc= as.data.frame(ls)
names(dvacc)= c("p","d","q","AIC","BIC","AICc")

#dvacc
knitr::kable(dvacc)
p d q AIC BIC AICc
0 0 0 961.2278 966.4408 961.8594
0 1 0 909.6730 913.1001 909.9887
0 0 1 922.8001 929.7507 923.8811
0 1 1 891.0065 896.1472 891.6552
0 0 2 907.6571 916.3455 909.3238
0 1 2 892.9500 899.8043 894.0611
0 0 3 906.6789 917.1049 909.0789
0 1 3 888.5781 897.1459 890.2923
0 0 4 908.6724 920.8360 911.9665
0 1 4 887.4925 897.7739 889.9630
0 0 5 904.2750 918.1763 908.6386
0 1 5 889.4303 901.4253 892.8243
0 0 6 906.0467 921.6857 911.6717
0 1 6 885.7971 899.5057 890.2971
0 0 7 907.9810 925.3577 915.0777
0 1 7 887.2764 902.6986 893.0829
0 0 8 907.8380 926.9523 916.6380
0 1 8 889.0024 906.1381 896.3357
0 0 9 908.6709 929.5230 919.4296
0 1 9 889.1873 908.0365 898.2907
0 0 10 909.5623 932.1520 922.5623
1 0 0 931.3810 938.3317 932.4621
1 1 0 905.2606 910.4013 905.9092
1 0 1 908.9242 917.6126 910.5909
1 1 1 892.9864 899.8407 894.0975
1 0 2 907.2712 917.6972 909.6712
1 1 2 889.2626 897.8304 890.9768
1 0 3 908.6759 920.8396 911.9700
1 1 3 887.9449 898.2263 890.4155
1 0 4 908.7772 922.6785 913.1408
1 1 4 889.4598 901.4548 892.8537
1 0 5 910.2747 925.9138 915.8997
1 1 5 888.9612 902.6698 893.4612
1 0 6 911.7520 929.1287 918.8487
1 1 6 887.4447 902.8669 893.2512
1 0 7 910.3910 929.5053 919.1910
1 1 7 889.1595 906.2952 896.4928
1 0 8 909.0411 929.8931 919.7997
1 1 8 892.1919 911.0412 901.2953
1 0 9 910.6741 933.2638 923.6741
Code
dvacc[which.min(dvacc$AIC),]
   p d q      AIC      BIC     AICc
14 0 1 6 885.7971 899.5057 890.2971
Code
dvacc[which.min(dvacc$BIC),]
  p d q      AIC      BIC     AICc
4 0 1 1 891.0065 896.1472 891.6552
Code
dvacc[which.min(dvacc$AICc),]
   p d q      AIC      BIC    AICc
10 0 1 4 887.4925 897.7739 889.963

Among the ARIMA models tested, ARIMA(0,1,6) has the lowest AIC value, indicating its superior fit compared to the other models in terms of goodness of fit and complexity. Conversely, ARIMA(0,1,1) boasts the lowest BIC, while ARIMA(0,1,4) exhibits the lowest AICc. Despite these distinctions, a comprehensive evaluation considering all metrics suggests that ARIMA(0,1,6) is the optimal choice, as it strikes a balance between model complexity and performance. Therefore, ARIMA(0,1,6) emerges as the preferred model based on a holistic assessment of AIC, BIC, and AICc values.

Code
d=1
i=1
dvacc= data.frame()
ls=matrix(rep(NA,6*12),nrow=12) 


for (p in 0:1)
{
  for(q in 0:2)
  {
    for(d in 0:1)
    {
      
      if(p-1+d+q-1<=8) #usual threshold
      {
        
        model<- Arima(diff(hos_ts1),order=c(p,d,q),include.drift=TRUE) 
        ls[i,]= c(p,d,q,model$aic,model$bic,model$aicc)
        i=i+1
        #print(i)
        
      }
      
    }
  }
}

dvacc= as.data.frame(ls)
names(dvacc)= c("p","d","q","AIC","BIC","AICc")

#dvacc
knitr::kable(dvacc)
p d q AIC BIC AICc
0 0 0 1201.369 1206.983 1201.915
0 1 0 1194.581 1198.281 1194.853
0 0 1 1200.550 1208.034 1201.480
0 1 1 1180.010 1185.561 1180.568
0 0 2 1199.638 1208.994 1201.067
0 1 2 1180.156 1187.556 1181.108
1 0 0 1200.572 1208.057 1201.502
1 1 0 1191.430 1196.981 1191.988
1 0 1 1202.437 1211.793 1203.866
1 1 1 1180.024 1187.424 1180.976
1 0 2 1183.551 1194.779 1185.600
1 1 2 1181.936 1191.187 1183.399
Code
dvacc[which.min(dvacc$AIC),]
  p d q     AIC      BIC     AICc
4 0 1 1 1180.01 1185.561 1180.568
Code
dvacc[which.min(dvacc$BIC),]
  p d q     AIC      BIC     AICc
4 0 1 1 1180.01 1185.561 1180.568
Code
dvacc[which.min(dvacc$AICc),]
  p d q     AIC      BIC     AICc
4 0 1 1 1180.01 1185.561 1180.568

The model with the lowest AIC, BIC, AICc is ARIMA(0,1,1). So the best model is ARIMA(0,1,1).

Code
d=1
i=1
dvacc= data.frame()
ls=matrix(rep(NA,6*18),nrow=18) 


for (p in 0:2)
{
  for(q in 0:2)
  {
    for(d in 0:1)
    {
      
      if(p-1+d+q-1<=8) #usual threshold
      {
        
        model<- Arima(diff(hos_ts2),order=c(p,d,q),include.drift=TRUE) 
        ls[i,]= c(p,d,q,model$aic,model$bic,model$aicc)
        i=i+1
        #print(i)
        
      }
      
    }
  }
}

dvacc= as.data.frame(ls)
names(dvacc)= c("p","d","q","AIC","BIC","AICc")

#dvacc
knitr::kable(dvacc)
p d q AIC BIC AICc
0 0 0 1111.642 1117.255 1112.187
0 1 0 1111.820 1115.521 1112.093
0 0 1 1109.362 1116.847 1110.292
0 1 1 1093.468 1099.019 1094.027
0 0 2 1099.705 1109.061 1101.133
0 1 2 1090.587 1097.988 1091.540
1 0 0 1112.394 1119.879 1113.324
1 1 0 1112.938 1118.489 1113.497
1 0 1 1109.563 1118.919 1110.992
1 1 1 1093.881 1101.282 1094.834
1 0 2 1098.786 1110.013 1100.835
1 1 2 1091.077 1100.327 1092.540
2 0 0 1102.004 1111.360 1103.433
2 1 0 1102.553 1109.954 1103.506
2 0 1 1098.674 1109.902 1100.723
2 1 1 1084.503 1093.754 1085.966
2 0 2 1099.303 1112.401 1102.103
2 1 2 1085.436 1096.537 1087.536
Code
dvacc[which.min(dvacc$AIC),]
   p d q      AIC      BIC     AICc
16 2 1 1 1084.503 1093.754 1085.966
Code
dvacc[which.min(dvacc$BIC),]
   p d q      AIC      BIC     AICc
16 2 1 1 1084.503 1093.754 1085.966
Code
dvacc[which.min(dvacc$AICc),]
   p d q      AIC      BIC     AICc
16 2 1 1 1084.503 1093.754 1085.966

The model with the lowest AIC, BIC, AICc is ARIMA(2,1,1). So the best model is ARIMA(2,1,1).

Code
d=1
i=1
dvacc= data.frame()
ls=matrix(rep(NA,6*16),nrow=16) 


for (p in 0:3)
{
  for(q in 0:1)
  {
    for(d in 0:1)
    {
      
      if(p-1+d+q-1<=8) #usual threshold
      {
        
        model<- Arima(diff(hos_ts3),order=c(p,d,q),include.drift=TRUE) 
        ls[i,]= c(p,d,q,model$aic,model$bic,model$aicc)
        i=i+1
        #print(i)
        
      }
      
    }
  }
}

dvacc= as.data.frame(ls)
names(dvacc)= c("p","d","q","AIC","BIC","AICc")

#dvacc
knitr::kable(dvacc)
p d q AIC BIC AICc
0 0 0 -207.1751 -201.5615 -206.6296
0 1 0 -182.0062 -178.3059 -181.7335
0 0 1 -212.0688 -204.5840 -211.1386
0 1 1 -197.8732 -192.3228 -197.3151
1 0 0 -207.1813 -199.6965 -206.2511
1 1 0 -180.4098 -174.8594 -179.8517
1 0 1 -211.3958 -202.0398 -209.9673
1 1 1 -198.3048 -190.9042 -197.3524
2 0 0 -219.0286 -209.6726 -217.6000
2 1 0 -193.7762 -186.3756 -192.8238
2 0 1 -217.1926 -205.9654 -215.1438
2 1 1 -209.0758 -199.8250 -207.6124
3 0 0 -217.1759 -205.9487 -215.1272
3 1 0 -198.2588 -189.0080 -196.7953
3 0 1 -215.1803 -202.0819 -212.3803
3 1 1 -207.1298 -196.0290 -205.0298
Code
dvacc[which.min(dvacc$AIC),]
  p d q       AIC       BIC   AICc
9 2 0 0 -219.0286 -209.6726 -217.6
Code
dvacc[which.min(dvacc$BIC),]
  p d q       AIC       BIC   AICc
9 2 0 0 -219.0286 -209.6726 -217.6
Code
dvacc[which.min(dvacc$AICc),]
  p d q       AIC       BIC   AICc
9 2 0 0 -219.0286 -209.6726 -217.6

The model with the lowest AIC, BIC, AICc is ARIMA(2,0,0). So the best model is ARIMA(2,0,0).

Code
d=1
i=1
dvacc= data.frame()
ls=matrix(rep(NA,6*8),nrow=8) 


for (p in 0:1)
{
  for(q in 0:1)
  {
    for(d in 0:1)
    {
      
      if(p-1+d+q-1<=8) #usual threshold
      {
        
        model<- Arima(diff(unemploy_ts),order=c(p,d,q),include.drift=TRUE) 
        ls[i,]= c(p,d,q,model$aic,model$bic,model$aicc)
        i=i+1
        #print(i)
        
      }
      
    }
  }
}

dvacc= as.data.frame(ls)
names(dvacc)= c("p","d","q","AIC","BIC","AICc")

#dvacc
knitr::kable(dvacc)
p d q AIC BIC AICc
0 0 0 -120.7451 -116.97081 -119.65419
0 1 0 -99.4529 -97.01515 -98.90744
0 0 1 -118.7475 -113.71510 -116.84272
0 1 1 -111.6317 -107.97505 -110.48882
1 0 0 -118.7467 -113.71428 -116.84190
1 1 0 -101.9969 -98.34025 -100.85402
1 0 1 -119.6972 -113.40668 -116.69716
1 1 1 -109.6552 -104.77973 -107.65524
Code
dvacc[which.min(dvacc$AIC),]
  p d q       AIC       BIC      AICc
1 0 0 0 -120.7451 -116.9708 -119.6542
Code
dvacc[which.min(dvacc$BIC),]
  p d q       AIC       BIC      AICc
1 0 0 0 -120.7451 -116.9708 -119.6542
Code
dvacc[which.min(dvacc$AICc),]
  p d q       AIC       BIC      AICc
1 0 0 0 -120.7451 -116.9708 -119.6542

The model with the lowest AIC, AICc is ARIMA(1,0,1). While the model with the lowest BIC is ARIMA(0,0,0), the significantly lower AIC and AICc values of ARIMA(1,0,1) underscore its stronger performance. Therefore, based on the evaluation metrics, ARIMA(1,0,1) emerges as the optimal model.

Code
d = 1
i = 1
dvacc = data.frame()
ls = matrix(rep(NA, 6*39), nrow = 39)

for (p in 0:1) {
  for (q in 0:10) {
    for (d in 0:1) {
      if (p - 1 + d + q - 1 <= 8) { # Usual threshold
        tryCatch({
          model <- Arima(diff(stock_ts), order = c(p, d, q), include.drift = TRUE) 
          ls[i, ] = c(p, d, q, model$aic, model$bic, model$aicc)
          i = i + 1
        }, error = function(e) {
          cat("Error occurred for p =", p, ", d =", d, ", q =", q, ":", conditionMessage(e), "\n")
        })
      }
    }
  }
}
Error occurred for p = 1 , d = 0 , q = 1 : non-stationary AR part from CSS 
Code
dvacc = as.data.frame(ls)
names(dvacc) = c("p", "d", "q", "AIC", "BIC", "AICc")


#dvacc
knitr::kable(dvacc)
p d q AIC BIC AICc
0 0 0 91.69192 97.24237 92.25006
0 1 0 128.69892 132.35620 128.97799
0 0 1 90.91191 98.31250 91.86429
0 1 1 94.70812 100.19405 95.27955
0 0 2 89.33705 98.58779 90.80046
0 1 2 94.43328 101.74784 95.40889
0 0 3 90.24920 101.35008 92.34920
0 1 3 92.35282 101.49603 93.85282
0 0 4 89.17360 102.12464 92.04540
0 1 4 93.67619 104.64804 95.83004
0 0 5 89.61851 104.41969 93.40798
0 1 5 95.67588 108.47637 98.62325
0 0 6 90.89691 107.54824 95.76177
0 1 6 96.20192 110.83105 100.09381
0 0 7 89.42175 107.92323 95.53286
0 1 7 97.22498 113.68275 102.22498
0 0 8 90.76309 111.11471 98.30595
0 1 8 95.06742 113.35384 101.35314
0 0 9 92.66751 114.86928 101.84398
0 1 9 96.83202 116.94708 104.59673
0 0 10 91.09034 115.14226 102.12064
1 0 0 89.34164 96.74223 90.29402
1 1 0 98.25570 103.74162 98.82712
1 1 1 95.71995 103.03452 96.69556
1 0 2 90.44847 101.54935 92.54847
1 1 2 91.32693 100.47013 92.82693
1 0 3 87.11988 100.07091 89.99168
1 1 3 93.64810 104.61995 95.80195
1 0 4 89.08540 103.88658 92.87488
1 1 4 94.19910 106.99959 97.14647
1 0 5 90.56824 107.21957 95.43310
1 1 5 96.18703 110.81617 100.07893
1 0 6 91.95168 110.45316 98.06279
1 1 6 96.74788 113.20565 101.74788
1 0 7 90.73691 111.08853 98.27976
1 1 7 98.64202 116.92843 104.92773
1 0 8 92.68015 114.88192 101.85662
1 1 8 96.65387 116.76892 104.41857
1 0 9 94.65167 118.70359 105.68197
Code
dvacc[which.min(dvacc$AIC),]
   p d q      AIC      BIC     AICc
27 1 0 3 87.11988 100.0709 89.99168
Code
dvacc[which.min(dvacc$BIC),]
   p d q      AIC      BIC     AICc
22 1 0 0 89.34164 96.74223 90.29402
Code
dvacc[which.min(dvacc$AICc),]
   p d q      AIC      BIC     AICc
27 1 0 3 87.11988 100.0709 89.99168

The model with the lowest AIC, AICc is ARIMA(1,0,3). While the model with the lowest BIC is ARIMA(1,0,0), the significantly lower AIC and AICc values of ARIMA(1,0,3) underscore its stronger performance. Therefore, based on the evaluation metrics, ARIMA(1,0,3) emerges as the optimal model.

Code
d=1
i=1
dvacc= data.frame()
ls=matrix(rep(NA,6*8),nrow=8) 


for (p in 0:1)
{
  for(q in 0:1)
  {
    for(d in 0:1)
    {
      
      if(p-1+d+q-1<=8) #usual threshold
      {
        
        model<- Arima(diff(demo_ts),order=c(p,d,q),include.drift=TRUE) 
        ls[i,]= c(p,d,q,model$aic,model$bic,model$aicc)
        i=i+1
        #print(i)
        
      }
      
    }
  }
}

dvacc= as.data.frame(ls)
names(dvacc)= c("p","d","q","AIC","BIC","AICc")

#dvacc
knitr::kable(dvacc)
p d q AIC BIC AICc
0 0 0 -201.1013 -195.4877 -200.5559
0 1 0 -146.0575 -142.3572 -145.7848
0 0 1 -227.4413 -219.9565 -226.5110
0 1 1 -191.9260 -186.3755 -191.3678
1 0 0 -215.5599 -208.0751 -214.6297
1 1 0 -181.3931 -175.8427 -180.8350
1 0 1 -225.4535 -216.0975 -224.0249
1 1 1 -205.1833 -197.7827 -204.2309
Code
dvacc[which.min(dvacc$AIC),]
  p d q       AIC       BIC     AICc
3 0 0 1 -227.4413 -219.9565 -226.511
Code
dvacc[which.min(dvacc$BIC),]
  p d q       AIC       BIC     AICc
3 0 0 1 -227.4413 -219.9565 -226.511
Code
dvacc[which.min(dvacc$AICc),]
  p d q       AIC       BIC     AICc
3 0 0 1 -227.4413 -219.9565 -226.511

The model with the lowest AIC, BIC, AICc is ARIMA(0,0,1). So the best model is ARIMA(0,0,1).

Code
d=1
i=1
dvacc= data.frame()
ls=matrix(rep(NA,6*8),nrow=8) 


for (p in 0:1)
{
  for(q in 0:1)
  {
    for(d in 0:1)
    {
      
      if(p-1+d+q-1<=8) #usual threshold
      {
        
        model<- Arima(diff(inde_ts),order=c(p,d,q),include.drift=TRUE) 
        ls[i,]= c(p,d,q,model$aic,model$bic,model$aicc)
        i=i+1
        #print(i)
        
      }
      
    }
  }
}

dvacc= as.data.frame(ls)
names(dvacc)= c("p","d","q","AIC","BIC","AICc")

#dvacc
knitr::kable(dvacc)
p d q AIC BIC AICc
0 0 0 -145.71805 -140.10445 -145.17260
0 1 0 -90.35146 -86.65116 -90.07873
0 0 1 -170.47709 -162.99229 -169.54686
0 1 1 -137.69655 -132.14611 -137.13841
1 0 0 -162.58903 -155.10423 -161.65880
1 1 0 -130.38563 -124.83519 -129.82749
1 0 1 -168.66260 -159.30660 -167.23403
1 1 1 -153.28254 -145.88195 -152.33015
Code
dvacc[which.min(dvacc$AIC),]
  p d q       AIC       BIC      AICc
3 0 0 1 -170.4771 -162.9923 -169.5469
Code
dvacc[which.min(dvacc$BIC),]
  p d q       AIC       BIC      AICc
3 0 0 1 -170.4771 -162.9923 -169.5469
Code
dvacc[which.min(dvacc$AICc),]
  p d q       AIC       BIC      AICc
3 0 0 1 -170.4771 -162.9923 -169.5469

The model with the lowest AIC, BIC, AICc is ARIMA(0,0,1). So the best model is ARIMA(0,0,1).

Code
d=1
i=1
dvacc= data.frame()
ls=matrix(rep(NA,6*8),nrow=8) 


for (p in 0:1)
{
  for(q in 0:1)
  {
    for(d in 0:1)
    {
      
      if(p-1+d+q-1<=8) #usual threshold
      {
        
        model<- Arima(diff(rep_ts),order=c(p,d,q),include.drift=TRUE) 
        ls[i,]= c(p,d,q,model$aic,model$bic,model$aicc)
        i=i+1
        #print(i)
        
      }
      
    }
  }
}

dvacc= as.data.frame(ls)
names(dvacc)= c("p","d","q","AIC","BIC","AICc")

#dvacc
knitr::kable(dvacc)
p d q AIC BIC AICc
0 0 0 -219.1727 -213.5591 -218.6273
0 1 0 -175.8871 -172.1868 -175.6144
0 0 1 -223.3828 -215.8980 -222.4526
0 1 1 -209.6209 -204.0705 -209.0628
1 0 0 -218.4539 -210.9691 -217.5237
1 1 0 -186.6854 -181.1350 -186.1273
1 0 1 -229.4175 -220.0615 -227.9889
1 1 1 -208.5970 -201.1964 -207.6446
Code
dvacc[which.min(dvacc$AIC),]
  p d q       AIC       BIC      AICc
7 1 0 1 -229.4175 -220.0615 -227.9889
Code
dvacc[which.min(dvacc$BIC),]
  p d q       AIC       BIC      AICc
7 1 0 1 -229.4175 -220.0615 -227.9889
Code
dvacc[which.min(dvacc$AICc),]
  p d q       AIC       BIC      AICc
7 1 0 1 -229.4175 -220.0615 -227.9889

The model with the lowest AIC, BIC, AICc is ARIMA(1,0,1). So the best model is ARIMA(1,0,1).

In terms of AIC, BIC and AICc, we always want to choose the lowest values, however, it can happen that the same model won’t have the lowest value for AIC, BIC, and AICc at the same time. In that case we favor the results from AIC as that is a better estimator for autoregressive models. In the next section we can see the results from the AIC-BIC analysis.

Final Selection:

  • Daily vaccinations time series: p=2, d=1, q=2
  • People vaccinated time series: p=0, d=1, q=3
  • People fully vaccinated time series: p=0, d=1, q=3
  • Newly confirmed case time series: p=0, d=1, q=2
  • Death case time series: p=0, d=1, q=6
  • Inpatient bed time series: p=0, d=1, q=1
  • Inpatient bed used for COVID time series: p=2, d=1, q=1
  • Utilization rate for inpatient bed used for COVID time series: p=2, d=0, q=0
  • Unemployment rate time series: p=1, d=0, q=1
  • Pfizer stock price time series: p=1, d=0, q=3
  • Support rate for democratic time series: p=0, d=0, q=1
  • Support rate for independent time series: p=0, d=0, q=1
  • Support rate for republican time series: p=1, d=0, q=1

5. Fitting the best model

Code
fit_d_vacc_AR <- Arima(diff(d_vacc_ts), order=c(2, 1, 2),include.drift = TRUE) 
summary(fit_d_vacc_AR)
Series: diff(d_vacc_ts) 
ARIMA(2,1,2) with drift 

Coefficients:
         ar1      ar2      ma1     ma2     drift
      1.2897  -0.8477  -1.9859  0.9999   76.8714
s.e.  0.1167   0.1099   0.2042  0.2043  466.8502

sigma^2 = 2.446e+09:  log likelihood = -344.81
AIC=701.61   AICc=705.61   BIC=709.6

Training set error measures:
                    ME    RMSE      MAE      MPE    MAPE      MASE     ACF1
Training set -12609.01 44043.5 29611.64 -124.755 294.076 0.3295796 -0.17266

The equation for the model is: \[x_t = 1.2897x_{t-1} - 0.8476x_{t-2} + w_t - 1.9859w_{t-1} + 0.9999w_{t-2}\]

Code
fit_p_vacc_AR <- Arima(diff(p_vacc_ts), order=c(0, 1, 3),include.drift = TRUE) 
summary(fit_p_vacc_AR)
Series: diff(p_vacc_ts) 
ARIMA(0,1,3) with drift 

Coefficients:
         ma1      ma2      ma3    drift
      0.2969  -0.2969  -1.0000  -0.2897
s.e.  0.3100   0.2357   0.4322   0.0827

sigma^2 = 1.625:  log likelihood = -46.19
AIC=102.38   AICc=105.24   BIC=108.86

Training set error measures:
                      ME     RMSE       MAE      MPE     MAPE      MASE
Training set -0.08268841 1.155475 0.7479236 18.18607 72.88163 0.1936982
                    ACF1
Training set -0.03066154

The equation for the model is: \[x_t = w_t + 0.2969w_{t-1} - 0.2969w_{t-2} - 1.0000w_{t-3} \]

Code
fit_pf_vacc_AR <- Arima(diff(pf_vacc_ts), order=c(0, 1, 3),include.drift = TRUE) 
summary(fit_pf_vacc_AR)
Series: diff(pf_vacc_ts) 
ARIMA(0,1,3) with drift 

Coefficients:
         ma1      ma2      ma3    drift
      0.2578  -0.4132  -0.8446  -0.2678
s.e.  0.1986   0.2289   0.2101   0.0722

sigma^2 = 1.385:  log likelihood = -42.67
AIC=95.34   AICc=98.2   BIC=101.82

Training set error measures:
                     ME     RMSE       MAE      MPE     MAPE      MASE
Training set 0.01757527 1.066799 0.7665792 13.17391 135.8925 0.2175191
                  ACF1
Training set 0.1836963

The equation for the model is: \[x_t = w_t + 0.2578w_{t-1} - 0.4132w_{t-2} - 0.8446w_{t-3} \]

Code
fit_case_AR <- Arima(diff(case_ts), order=c(0, 1, 2),include.drift = TRUE) 
summary(fit_case_AR)
Series: diff(case_ts) 
ARIMA(0,1,2) with drift 

Coefficients:
          ma1      ma2       drift
      -0.0254  -0.6635   -5440.876
s.e.   0.1347   0.1331  112821.232

sigma^2 = 4.582e+12:  log likelihood = -654.84
AIC=1317.68   AICc=1318.79   BIC=1324.53

Training set error measures:
                   ME    RMSE     MAE       MPE     MAPE      MASE      ACF1
Training set 58874.29 2036015 1092948 -54.67793 86.34503 0.4079362 0.1041066

The equation for the model is: \[x_t = w_t - 0.0254w_{t-1} - 0.6635w_{t-2} \]

Code
fit_dead_AR <- Arima(diff(dead_ts), order=c(0, 1, 6),include.drift = TRUE) 
summary(fit_dead_AR)
Series: diff(dead_ts) 
ARIMA(0,1,6) with drift 

Coefficients:
         ma1      ma2      ma3      ma4     ma5     ma6      drift
      0.8533  -0.3695  -1.0757  -1.0900  0.0148  0.6672  -864.6125
s.e.  0.2409   0.3200   0.2219   0.2703  0.2351  0.2069   362.8723

sigma^2 = 84619984:  log likelihood = -434.9
AIC=885.8   AICc=890.3   BIC=899.51

Training set error measures:
                   ME     RMSE      MAE     MPE    MAPE      MASE        ACF1
Training set 1373.716 8276.587 6716.487 -24.481 66.6359 0.2869461 -0.09912142

The equation for the model is: \[x_t = w_t + 0.8533w_{t-1} - 0.3695w_{t-2} - 1.0757w_{t-3} - 1.0900w_{t-4} + 0.0148w_{t-5} + 0.6672w_{t-6} \]

Code
fit_hos1_AR <- Arima(diff(hos_ts1), order=c(0, 1, 1),include.drift = TRUE) 
summary(fit_hos1_AR)
Series: diff(hos_ts1) 
ARIMA(0,1,1) with drift 

Coefficients:
          ma1      drift
      -0.8352  -2100.377
s.e.   0.0997   1731.589

sigma^2 = 4.203e+09:  log likelihood = -587.01
AIC=1180.01   AICc=1180.57   BIC=1185.56

Training set error measures:
                   ME     RMSE      MAE      MPE     MAPE      MASE      ACF1
Training set 1488.251 62773.32 28941.07 231.9019 1121.634 0.9238171 0.2046115

The equation for the model is: \[x_t = w_t - 0.8352w_{t-1} \]

Code
fit_hos2_AR <- Arima(diff(hos_ts2), order=c(2, 1, 1),include.drift = TRUE) 
summary(fit_hos2_AR)
Series: diff(hos_ts2) 
ARIMA(2,1,1) with drift 

Coefficients:
         ar1      ar2      ma1      drift
      0.2460  -0.4570  -1.0000  -184.7783
s.e.  0.1276   0.1247   0.0625   189.0308

sigma^2 = 491138696:  log likelihood = -537.25
AIC=1084.5   AICc=1085.97   BIC=1093.75

Training set error measures:
                    ME     RMSE      MAE      MPE     MAPE      MASE
Training set -1297.816 20975.66 13210.73 -135.526 364.1159 0.5416423
                    ACF1
Training set -0.06551829

The equation for the model is: \[x_t = 0.2460x_{t-1} - 0.4570x_{t-2} + w_t - 1.0000w_{t-1} \]

Code
fit_hos3_AR <- Arima(diff(hos_ts3), order=c(2, 0, 0),include.drift = TRUE) 
summary(fit_hos3_AR)
Series: diff(hos_ts3) 
ARIMA(2,0,0) with drift 

Coefficients:
        ar1      ar2  intercept   drift
      0.305  -0.4961     0.0038  -1e-04
s.e.  0.124   0.1220     0.0056   2e-04

sigma^2 = 0.0005341:  log likelihood = 114.51
AIC=-219.03   AICc=-217.6   BIC=-209.67

Training set error measures:
                       ME       RMSE       MAE      MPE     MAPE      MASE
Training set 0.0001589413 0.02212643 0.0145853 73.24355 106.4801 0.5439367
                    ACF1
Training set -0.02088889

The equation for the model is: \[x_t = 0.305x_{t-1} - 0.4961x_{t-2} \]

Code
fit_unemploy_AR <- Arima(diff(unemploy_ts), order=c(1, 0, 1),include.drift = TRUE) 
summary(fit_unemploy_AR)
Series: diff(unemploy_ts) 
ARIMA(1,0,1) with drift 

Coefficients:
         ar1      ma1  intercept   drift
      0.6683  -1.0000     0.0039  -4e-04
s.e.  0.1782   0.1018     0.0054   6e-04

sigma^2 = 0.0004398:  log likelihood = 64.85
AIC=-119.7   AICc=-116.7   BIC=-113.41

Training set error measures:
                      ME       RMSE         MAE  MPE MAPE      MASE      ACF1
Training set 0.001267183 0.01929114 0.008045807 -Inf  Inf 0.6023599 0.1298093

The equation for the model is: \[x_t = -0.7636x_{t-1} + w_t + 0.8226w_{t-1} \]

Code
fit_stock_AR <- Arima(diff(stock_ts), order=c(1, 0, 3),include.drift = TRUE) 
summary(fit_stock_AR)
Series: diff(stock_ts) 
ARIMA(1,0,3) with drift 

Coefficients:
         ar1      ma1     ma2      ma3  intercept    drift
      0.5911  -1.0179  0.4384  -0.4204     0.0292  -0.0057
s.e.  0.2132   0.2324  0.1997   0.1771     0.0608   0.0025

sigma^2 = 0.3012:  log likelihood = -36.56
AIC=87.12   AICc=89.99   BIC=100.07

Training set error measures:
                     ME      RMSE       MAE      MPE     MAPE      MASE
Training set 0.01245506 0.5125679 0.3725768 53.53454 101.9605 0.6408168
                    ACF1
Training set -0.01878968

The equation for the model is: \[x_t = 0.5911x_{t-1} + w_t - 1.0179w_{t-1} + 0.4384w_{t-2} - 0.4204w_{t-3} \]

Code
fit_demo_AR <- Arima(diff(demo_ts), order=c(0, 0, 1),include.drift = TRUE) 
summary(fit_demo_AR)
Series: diff(demo_ts) 
ARIMA(0,0,1) with drift 

Coefficients:
          ma1  intercept  drift
      -1.0000     -9e-04      0
s.e.   0.0577        NaN    NaN

sigma^2 = 0.0004267:  log likelihood = 117.72
AIC=-227.44   AICc=-226.51   BIC=-219.96

Training set error measures:
                        ME       RMSE       MAE MPE MAPE      MASE        ACF1
Training set -0.0002909499 0.02000063 0.0157191 NaN  Inf 0.5052568 0.002088654

The equation for the model is: \[x_t = w_t - 1.0000w_{t-1} \]

Code
fit_inde_AR <- Arima(diff(inde_ts), order=c(0, 0, 1),include.drift = TRUE) 
summary(fit_inde_AR)
Series: diff(inde_ts) 
ARIMA(0,0,1) with drift 

Coefficients:
          ma1  intercept  drift
      -1.0000     0.0011      0
s.e.   0.0591        NaN    NaN

sigma^2 = 0.001398:  log likelihood = 89.24
AIC=-170.48   AICc=-169.55   BIC=-162.99

Training set error measures:
                       ME       RMSE        MAE  MPE MAPE      MASE       ACF1
Training set -0.001342649 0.03620296 0.02893673 -Inf  Inf 0.4451804 0.06199612

The equation for the model is: \[x_t = w_t - 1.0000w_{t-1} \]

Code
fit_rep_AR <- Arima(diff(rep_ts), order=c(1, 0, 1),include.drift = TRUE) 
summary(fit_rep_AR)
Series: diff(rep_ts) 
ARIMA(1,0,1) with drift 

Coefficients:
         ar1      ma1  intercept  drift
      0.4095  -1.0000      1e-04      0
s.e.  0.1345   0.0627        NaN    NaN

sigma^2 = 0.0004088:  log likelihood = 119.71
AIC=-229.42   AICc=-227.99   BIC=-220.06

Training set error measures:
                       ME       RMSE        MAE MPE MAPE      MASE       ACF1
Training set 0.0007249485 0.01935823 0.01596133 NaN  Inf 0.6245738 0.06783213

The equation for the model is: \[x_t = 0.4095x_{t-1} + w_t - 1.0000w_{t-1} \]

6. Fully Model Diagnostics

Code
d_vacc_full <- capture.output(sarima(diff(d_vacc_ts),2, 1, 2))

Code
cat(d_vacc_full[58:72], d_vacc_full[length(d_vacc_full)], sep = "\n") 
converged
<><><><><><><><><><><><><><>
 
Coefficients: 
         Estimate       SE t.value p.value
ar1        1.2897   0.1167 11.0487  0.0000
ar2       -0.8477   0.1099 -7.7104  0.0000
ma1       -1.9859   0.2042 -9.7273  0.0000
ma2        0.9999   0.2043  4.8941  0.0001
constant  76.8714 466.8502  0.1647  0.8707

sigma^2 estimated as 2009108752 on 23 degrees of freedom 
 
AIC = 25.05754  AICc = 25.15494  BIC = 25.34301 
 
 

The Standard Residual Plot presents an encouraging picture, exhibiting characteristics of good stationarity with a relatively constant mean and variance. This stability is a positive sign for the model’s accuracy. Furthermore, the Autocorrelation Function (ACF) plot reinforces this positive assessment by showing a lack of significant correlation among the residuals, suggesting that the model has effectively captured the underlying patterns in the data, leaving behind what appears to be mere white noise. This is indicative of an exceptionally well-fitted model.

The Quantile-Quantile (Q-Q) Plot also leans towards a favorable evaluation, demonstrating a reasonable approximation of normality, though with some variability. This slight deviation does not detract from the overall model’s effectiveness.

However, the Ljung-Box test introduces a layer of complexity with its results. Despite observing variations, the test values exceed the 0.05 threshold (aligned with a 5% significance level), implying a lack of significant autocorrelation. This outcome, coupled with all p-values falling below the 0.05 mark, further validates the model’s robustness, suggesting a commendable fit to the observed data.

Code
p_vacc_full <- capture.output(sarima(diff(p_vacc_ts),0, 1, 3))

Code
cat(p_vacc_full[21:34], p_vacc_full[length(p_vacc_full)], sep = "\n") 
converged
<><><><><><><><><><><><><><>
 
Coefficients: 
         Estimate     SE t.value p.value
ma1        0.2969 0.3100  0.9578  0.3481
ma2       -0.2969 0.2357 -1.2599  0.2203
ma3       -1.0000 0.4322 -2.3138  0.0300
constant  -0.2897 0.0827 -3.5037  0.0019

sigma^2 estimated as 1.38457 on 23 degrees of freedom 
 
AIC = 3.791982  AICc = 3.859322  BIC = 4.031952 
 
 

The Standard Residual Plot demonstrates commendable stationarity, with a consistently constant mean and variance, suggesting the model’s robustness in capturing the data’s central tendencies and spread. The Autocorrelation Function (ACF) plot further reinforces the model’s efficacy, showing negligible correlation among residuals and implying that the model has adeptly isolated and left behind only white noise. This is indicative of an exceptionally well-fitted model.

In the realm of normality assessment, the Quantile-Quantile (Q-Q) Plot exhibits a satisfactory alignment with normality, although there is room for improvement in mirroring the ideal normal distribution curve more closely. The Ljung-Box test results introduce a nuanced perspective, with values straddling the 0.05 (5% significance) threshold. This indicates a lack of substantial autocorrelation, underscoring the model’s aptitude in fitting the data without overlooking significant patterns.

The analysis of Moving Average parameters reveals a differentiated significance; while the p-values for ma1 and ma2 do not denote statistical significance, falling above the 0.05 threshold, the p-value for ma3 stands out by being less than 0.05. This suggests that while the first two parameters may not contribute significantly to the model, ma3 plays a crucial role, offering insights into the subtleties of the model’s fit and the dynamics captured by this specific parameter.

Code
pf_vacc_full <- capture.output(sarima(diff(pf_vacc_ts),0, 1, 3))

Code
cat(pf_vacc_full[20:33], pf_vacc_full[length(pf_vacc_full)], sep = "\n") 
converged
<><><><><><><><><><><><><><>
 
Coefficients: 
         Estimate     SE t.value p.value
ma1        0.2578 0.1986  1.2980  0.2071
ma2       -0.4132 0.2289 -1.8054  0.0841
ma3       -0.8446 0.2101 -4.0202  0.0005
constant  -0.2678 0.0722 -3.7112  0.0011

sigma^2 estimated as 1.180211 on 23 degrees of freedom 
 
AIC = 3.531172  AICc = 3.598512  BIC = 3.771141 
 
 

The Standard Residual Plot presents an encouraging picture of stationarity, characterized by a largely constant mean and variation, indicative of a robust model. The Autocorrelation Function (ACF) plot further bolsters this assessment, displaying an absence of correlation and suggesting that the model has effectively captured the underlying process, leaving only white noise. This is a strong marker of an excellently fitted model. While the Quantile-Quantile (Q-Q) Plot demonstrates a commendable degree of normality, minor deviations are observable, pointing towards an area for potential refinement.

The Ljung-Box test results introduce an element of variability, with values crossing the 0.05 (5% significance) threshold, yet this does not denote significant autocorrelation, reinforcing the model’s adequacy. Although the p-values for ma1 and ma2 slightly exceed 0.05, the p-value for ma3 falls below this mark, suggesting that while certain model parameters may edge towards marginal significance, the overall model integrity remains intact, pointing towards a well-specified model.

Code
case_full <- capture.output(sarima(diff(case_ts),0, 1, 2))

Code
cat(case_full[20:32], case_full[length(case_full)], sep = "\n") 
converged
<><><><><><><><><><><><><><>
 
Coefficients: 
           Estimate          SE t.value p.value
ma1         -0.0254      0.1347 -0.1884  0.8515
ma2         -0.6635      0.1331 -4.9840  0.0000
constant -5440.8755 112821.2319 -0.0482  0.9618

sigma^2 estimated as 4.246465e+12 on 38 degrees of freedom 
 
AIC = 32.13855  AICc = 32.15437  BIC = 32.30573 
 
 

The Standard Residual Plot presents a promising depiction of stationarity, characterized by a consistent mean and variation across the board. The Autocorrelation Function (ACF) plot reinforces this positive assessment, showing no discernible correlation and implying that all residual patterns have been effectively captured by the model, leaving behind only white noise. This is a strong indicator of an excellent model fit.

Further analysis through the Quantile-Quantile (Q-Q) Plot suggests a satisfactory alignment with normality, although there is room for slight improvement. The Ljung-Box test results introduce some variability, with values surpassing the 0.05 threshold (indicative of a 5% significance level). This outcome points to the lack of substantial correlation, further affirming the model’s aptness.

Regarding the moving average parameters, the p-values associated with ma1 exceed the 0.05 mark, contrasting with ma2’s p-value, which falls below this threshold. This differential suggests a nuanced interplay within the model’s components, highlighting areas of both strength and potential refinement. ### Death case time series

Code
dead_full <- capture.output(sarima(diff(dead_ts),0, 1, 6))

Code
cat(dead_full[43:59], dead_full[length(dead_full)], sep = "\n") 
converged
<><><><><><><><><><><><><><>
 
Coefficients: 
          Estimate       SE t.value p.value
ma1         0.8533   0.2409  3.5425  0.0012
ma2        -0.3695   0.3200 -1.1546  0.2563
ma3        -1.0757   0.2219 -4.8470  0.0000
ma4        -1.0900   0.2703 -4.0323  0.0003
ma5         0.0148   0.2351  0.0631  0.9501
ma6         0.6672   0.2069  3.2254  0.0028
constant -864.6125 362.8723 -2.3827  0.0229

sigma^2 estimated as 70172669 on 34 degrees of freedom 
 
AIC = 21.60481  AICc = 21.68759  BIC = 21.93916 
 
 

The Standard Residual Plot presents a promising outlook, showcasing good signs of stationarity with a consistent mean and variation over time. In the Autocorrelation Function (ACF) plot, the absence of significant correlation further supports the efficacy of our model, suggesting it has successfully captured the underlying patterns in the data, leaving only white noise behind. This is indicative of an excellent model fit. While the Quantile-Quantile (Q-Q) Plot largely aligns with expectations of normality, displaying satisfactory adherence, there is still some variation observed.

Diving deeper into the diagnostic checks, the Ljung-Box test yields intriguing results, with values surpassing the 0.05 threshold (5% significance level). This indicates a lack of significant autocorrelation, reinforcing the model’s adequacy. However, an analysis of the Moving Average (MA) parameters reveals a nuanced picture: while the p-values for ma2 and ma5 exceed the 0.05 mark, suggesting these terms may not contribute significantly to the model, the p-values for ma1, ma3, ma4, and ma6 fall below this threshold, indicating their importance in the model’s structure. This mixed outcome highlights areas for potential refinement and underscores the model’s overall robustness.

Code
hos1_full <- capture.output(sarima(diff(hos_ts1),0, 1, 1))

Code
cat(hos1_full[22:33], hos1_full[length(hos1_full)], sep = "\n") 
converged
<><><><><><><><><><><><><><>
 
Coefficients: 
           Estimate        SE t.value p.value
ma1         -0.8352    0.0997 -8.3758  0.0000
constant -2100.3772 1731.5890 -1.2130  0.2315

sigma^2 estimated as 4024330014 on 45 degrees of freedom 
 
AIC = 25.1066  AICc = 25.11241  BIC = 25.2247 
 
 

The Standard Residual Plot presents a positive indication of stationarity, characterized by a consistent mean and variance throughout. This suggests a stable model performance over time. The Autocorrelation Function (ACF) plot reinforces this assessment, showing an absence of correlation among residuals and implying that the model has effectively captured the underlying pattern, leaving only white noise behind. This is a hallmark of an excellently fitted model.

Furthermore, the Quantile-Quantile (Q-Q) Plot demonstrates commendable adherence to normality, albeit with some minor deviations. The consistency in the plot underscores the model’s reliability in normal distribution assumptions. However, the Ljung-Box test introduces a nuanced perspective, displaying values that surpass the 0.05 threshold (5% significance level). This indicates a lack of significant autocorrelation, reinforcing the model’s adeptness at fitting the data effectively, as further evidenced by a p-value below 0.05.

Code
hos2_full <- capture.output(sarima(diff(hos_ts2),2, 1, 1))

Code
cat(hos2_full[31:44], hos2_full[length(hos2_full)], sep = "\n") 
converged
<><><><><><><><><><><><><><>
 
Coefficients: 
          Estimate       SE  t.value p.value
ar1         0.2460   0.1276   1.9271  0.0606
ar2        -0.4570   0.1247  -3.6649  0.0007
ma1        -1.0000   0.0625 -15.9981  0.0000
constant -184.7783 189.0308  -0.9775  0.3338

sigma^2 estimated as 449339658 on 43 degrees of freedom 
 
AIC = 23.07453  AICc = 23.09479  BIC = 23.27135 
 
 

The Standard Residual Plot presents a promising picture, showcasing characteristics of good stationarity with a largely consistent mean and variance across the series. The Autocorrelation Function (ACF) plot further strengthens our confidence in the model’s robustness by displaying negligible correlation, which implies that the model has successfully captured the underlying pattern, leaving only white noise behind. This is indicative of an exceptionally well-fitted model. Meanwhile, the Quantile-Quantile (Q-Q) Plot also leans towards demonstrating commendable normality, albeit with some deviations. The Ljung-Box test results introduce a slight variance, displaying values surpassing the 0.05 threshold (at a 5% significance level), which denotes the lack of substantial correlation—another hallmark of a model that is fitting well. Although the p-value for ar1 marginally exceeds 0.05, the p-values for ar2 and ma1 are below this threshold, further underscoring the model’s efficacy and the accuracy of its fit.

Code
hos3_full <- capture.output(sarima(diff(hos_ts3),2, 0, 0))

Code
cat(hos3_full[20:32], hos3_full[length(hos3_full)], sep = "\n") 
converged
<><><><><><><><><><><><><><>
 
Coefficients: 
      Estimate     SE t.value p.value
ar1     0.3115 0.1242  2.5083  0.0158
ar2    -0.4916 0.1224 -4.0165  0.0002
xmean   0.0005 0.0028  0.1899  0.8502

sigma^2 estimated as 0.000494291 on 45 degrees of freedom 
 
AIC = -4.595389  AICc = -4.584025  BIC = -4.439455 
 
 

The Standard Residual Plot presents a promising depiction of stationarity, characterized by a largely constant mean and variance, suggesting the model’s effectiveness in capturing the data’s essence. The Autocorrelation Function (ACF) plot further strengthens this assessment, displaying negligible correlation among residuals and implying that the model residuals resemble white noise—a hallmark of an excellent model fit. Meanwhile, the Quantile-Quantile (Q-Q) Plot offers substantial evidence of normality, albeit with some variability. This is complemented by the outcomes of the Ljung-Box test, which, despite variations, predominantly reports p-values below the 0.05 mark (5% significance level). Such results underscore a lack of significant autocorrelation within the residuals, affirming the model’s robustness and precision in fitting the data.

Code
unemploy_full <- capture.output(sarima(diff(diff(unemploy_ts)),1, 0, 1))

Code
cat(unemploy_full[135:147], unemploy_full[length(unemploy_full)], sep = "\n") 
converged
<><><><><><><><><><><><><><>
 
Coefficients: 
      Estimate     SE t.value p.value
ar1     0.0308 0.2012  0.1533  0.8796
ma1    -1.0000 0.1216 -8.2224  0.0000
xmean  -0.0005 0.0006 -0.8403  0.4098

sigma^2 estimated as 0.0004656674 on 22 degrees of freedom 
 
AIC = -4.386209  AICc = -4.340495  BIC = -4.191189 
 
 

After implementing first-order differencing, the unemployment rate time series continued to exhibit non-stationary characteristics, prompting the necessity for a second differencing step to attain stationarity. Post-differencing, the Standard Residual Plot demonstrated commendable stationarity, characterized by a mostly constant mean and variance, indicative of a well-adjusted series. The Autocorrelation Function (ACF) plot, revealing no significant correlations, suggests that the model has effectively captured the underlying patterns within the data, leaving behind what appears to be purely white noise. This is a strong indicator of an excellently fitted model.

Additionally, the Quantile-Quantile (Q-Q) Plot shows a satisfactory alignment with normality, though with some deviations. The results from the Ljung-Box test varied, presenting values surpassing the 0.05 threshold (5% significance level), which points to the absence of significant autocorrelations and underscores the model’s adequacy. The analysis of parameter significance revealed that the p-value for the autoregressive term (ar1) exceeded 0.05, suggesting it might not contribute significantly to the model, whereas the moving average term (ma1), with a p-value below 0.05, indicates a meaningful contribution. This nuanced understanding of the model’s components further attests to its robustness in capturing the dynamics of the unemployment rate time series.

Code
stock_full <- capture.output(sarima(diff(stock_ts),1, 0, 3))

Code
cat(stock_full[57:71], stock_full[length(stock_full)], sep = "\n") 
converged
<><><><><><><><><><><><><><>
 
Coefficients: 
      Estimate     SE t.value p.value
ar1     0.7390 0.1659  4.4548  0.0001
ma1    -1.0930 0.2018 -5.4163  0.0000
ma2     0.4961 0.1968  2.5211  0.0156
ma3    -0.4032 0.1575 -2.5594  0.0142
xmean  -0.1074 0.0232 -4.6303  0.0000

sigma^2 estimated as 0.2899104 on 42 degrees of freedom 
 
AIC = 1.901971  AICc = 1.933108  BIC = 2.13816 
 
 

The Standard Residual Plot presents a promising indication of stationarity, with the mean and variance appearing mostly constant throughout. This uniformity in the residuals suggests a stable model performance over time. Meanwhile, the Autocorrelation Function (ACF) plot reveals an absence of correlation among the residuals, implying that the model has effectively captured the underlying patterns in the data, leaving behind what appears to be pure white noise. Such an outcome is indicative of an excellently fitted model.

On the other hand, the Quantile-Quantile (Q-Q) Plot showcases a decent approximation to normality, although there is some variability. This suggests that while the model’s residuals closely follow a normal distribution, there are areas of deviation worth noting.

Moreover, the results from the Ljung-Box test introduce some variability, with values surpassing the 0.05 threshold (5% significance level). This would typically indicate potential correlation; however, in this context, it signifies the absence of significant autocorrelation, further affirming the model’s adequacy. Notably, all p-values fall below the 0.05 mark, reinforcing the statistical strength and the well-fitted nature of the model.

Code
demo_full <- capture.output(sarima(diff(demo_ts),0, 0, 1))

Code
cat(demo_full[145:156], demo_full[length(demo_full)], sep = "\n") 
converged
<><><><><><><><><><><><><><>
 
Coefficients: 
      Estimate     SE  t.value p.value
ma1    -0.9999 0.0732 -13.6622  0.0000
xmean   0.0002 0.0003   0.8221  0.4153

sigma^2 estimated as 0.0004178928 on 46 degrees of freedom 
 
AIC = -4.736419  AICc = -4.730863  BIC = -4.619469 
 
 

The Standard Residual Plot exhibits commendable stationarity, characterized by a consistent mean and variance throughout, indicative of a robust model performance. The Autocorrelation Function (ACF) plot further reinforces this by demonstrating an absence of correlation among residuals, thereby suggesting that the model has effectively captured the underlying data patterns, leaving behind only white noise. This is a hallmark of an excellently fitted model. Meanwhile, the Quantile-Quantile (Q-Q) Plot generally aligns with expectations of normality, although minor deviations are observed, which is typical in practical scenarios. The Ljung-Box test results introduce some variability, with certain values crossing the 0.05 (5% significance) threshold. However, the predominance of p-values below this threshold underscores the model’s ability to adequately represent the data without significant autocorrelation among residuals, cementing its status as well-calibrated and fitting.

Code
inde_full <- capture.output(sarima(diff(inde_ts),0, 0, 1))

Code
cat(inde_full[27:38], inde_full[length(inde_full)], sep = "\n") 
converged
<><><><><><><><><><><><><><>
 
Coefficients: 
      Estimate     SE  t.value p.value
ma1     -1e+00 0.0635 -15.7571  0.0000
xmean    3e-04 0.0004   0.7764  0.4415

sigma^2 estimated as 0.001318368 on 46 degrees of freedom 
 
AIC = -3.587405  AICc = -3.581849  BIC = -3.470454 
 
 

The Standard Residual Plot presents a commendable depiction of stationarity, with the mean and variance remaining mostly constant throughout, suggesting that the data points fluctuate around a steady level. The Autocorrelation Function (ACF) plot further reinforces the model’s efficacy by exhibiting negligible correlation among residuals, indicating that the model has successfully captured the underlying pattern in the data, leaving behind what appears to be mere white noise. This observation underscores the model’s robust fit to the data.

In the Quantile-Quantile (Q-Q) Plot, we observe a satisfactory alignment with normality, although there are minor deviations. Such variations are typical and do not significantly detract from the model’s overall performance.

However, the results from the Ljung-Box test introduce a layer of complexity, displaying values that occasionally surpass the 0.05 threshold, which typically denotes a 5% significance level. Despite these variations, the predominance of p-values less than 0.05 throughout our analysis provides strong evidence against significant autocorrelation among residuals, further affirming the model’s aptitude in capturing the essence of the dataset without overfitting.

Collectively, these diagnostic tools paint a picture of a well-adjusted model, adept at navigating through the intricacies of the data to offer valuable insights, albeit with room for minor improvements as indicated by the Q-Q plot and Ljung-Box test variations.

Code
rep_full <- capture.output(sarima(diff(rep_ts),1, 0, 1))

Code
cat(rep_full[126:138], rep_full[length(rep_full)], sep = "\n") 
converged
<><><><><><><><><><><><><><>
 
Coefficients: 
      Estimate     SE  t.value p.value
ar1       0.41 0.1351   3.0350  0.0040
ma1      -1.00 0.0628 -15.9210  0.0000
xmean     0.00 0.0003   0.0824  0.9347

sigma^2 estimated as 0.0003748071 on 45 degrees of freedom 
 
AIC = -4.821046  AICc = -4.809682  BIC = -4.665112 
 
 

The Standard Residual Plot presents a promising outlook, showcasing robust stationarity characterized by a largely constant mean and variance, indicative of a well-behaved model. The Autocorrelation Function (ACF) plot further corroborates this by revealing negligible correlation, implying that the residuals amount to white noise and underscoring the model’s comprehensive capture of underlying patterns—a hallmark of excellent model fit. Meanwhile, the Quantile-Quantile (Q-Q) Plot demonstrates commendable adherence to normality, albeit with minor deviations. The Ljung-Box test results introduce a nuance, exhibiting values surpassing the 0.05 threshold (at a 5% significance level), thereby negating the presence of substantial autocorrelation and endorsing the model’s aptness. Crucially, all observed p-values fall below the 0.05 mark, reinforcing the statistical soundness of our model.

7. Auto.Arima()

Code
auto.arima(diff(d_vacc_ts))
Series: diff(d_vacc_ts) 
ARIMA(0,0,2) with zero mean 

Coefficients:
         ma1     ma2
      0.8344  0.5402
s.e.  0.1780  0.2384

sigma^2 = 3.458e+09:  log likelihood = -359.11
AIC=724.22   AICc=725.18   BIC=728.32

The best model from the step above was ARIMA(2,1,2), while the best model Auto ARIMA gave me is ARIMA(0,0,2) with drift. This discrepancy raises concerns about reliability, as Auto ARIMA tends to overlook instances of significant lag correlation, as evidenced by the ACF/PACF plots. Instead, it prioritizes minimizing AIC/BIC values without considering the full spectrum of model dynamics. This narrow focus risks recommending a model prone to overfitting, lacking in the comprehensive assessment necessary for accurate forecasting.

Code
auto.arima(diff(p_vacc_ts))
Series: diff(p_vacc_ts) 
ARIMA(0,1,1) 

Coefficients:
         ma1
      0.4527
s.e.  0.1518

sigma^2 = 2.746:  log likelihood = -51.55
AIC=107.11   AICc=107.61   BIC=109.7

The best model from the step above was ARIMA(0,1,3), while the best model Auto ARIMA gave me is ARIMA(0,1,1) with drift. This discrepancy raises concerns about reliability, as Auto ARIMA tends to overlook instances of significant lag correlation, as evidenced by the ACF/PACF plots. Instead, it prioritizes minimizing AIC/BIC values without considering the full spectrum of model dynamics. This narrow focus risks recommending a model prone to overfitting, lacking in the comprehensive assessment necessary for accurate forecasting.

Code
auto.arima(diff(pf_vacc_ts))
Series: diff(pf_vacc_ts) 
ARIMA(0,1,2) 

Coefficients:
         ma1     ma2
      0.8498  0.5394
s.e.  0.1663  0.2047

sigma^2 = 1.679:  log likelihood = -44.79
AIC=95.59   AICc=96.63   BIC=99.47

The best model from the step above was ARIMA(0,1,3), while the best model Auto ARIMA gave me is ARIMA(0,1,2) with drift. This discrepancy raises concerns about reliability, as Auto ARIMA tends to overlook instances of significant lag correlation, as evidenced by the ACF/PACF plots. Instead, it prioritizes minimizing AIC/BIC values without considering the full spectrum of model dynamics. This narrow focus risks recommending a model prone to overfitting, lacking in the comprehensive assessment necessary for accurate forecasting.

Code
auto.arima(diff(case_ts))
Series: diff(case_ts) 
ARIMA(0,0,1) with non-zero mean 

Coefficients:
         ma1     mean
      0.8182  2324827
s.e.  0.0780   554675

sigma^2 = 4.192e+12:  log likelihood = -669.47
AIC=1344.95   AICc=1345.58   BIC=1350.16

The best model from the step above was ARIMA(0,1,2), while the best model Auto ARIMA gave me is ARIMA(0,0,1) with drift. This discrepancy raises concerns about reliability, as Auto ARIMA tends to overlook instances of significant lag correlation, as evidenced by the ACF/PACF plots. Instead, it prioritizes minimizing AIC/BIC values without considering the full spectrum of model dynamics. This narrow focus risks recommending a model prone to overfitting, lacking in the comprehensive assessment necessary for accurate forecasting.

Code
auto.arima(diff(dead_ts))
Series: diff(dead_ts) 
ARIMA(0,1,1) 

Coefficients:
         ma1
      0.7989
s.e.  0.0782

sigma^2 = 138666540:  log likelihood = -442.5
AIC=889.01   AICc=889.32   BIC=892.44

The best model from the step above was ARIMA(0,1,6), while the best model Auto ARIMA gave me is ARIMA(0,1,1) with drift. This discrepancy raises concerns about reliability, as Auto ARIMA tends to overlook instances of significant lag correlation, as evidenced by the ACF/PACF plots. Instead, it prioritizes minimizing AIC/BIC values without considering the full spectrum of model dynamics. This narrow focus risks recommending a model prone to overfitting, lacking in the comprehensive assessment necessary for accurate forecasting.

Code
auto.arima(diff(hos_ts1))
Series: diff(hos_ts1) 
ARIMA(0,1,1) 

Coefficients:
          ma1
      -0.7905
s.e.   0.0944

sigma^2 = 4.238e+09:  log likelihood = -587.61
AIC=1179.22   AICc=1179.49   BIC=1182.92

The best model from the step above and from the Auto ARIMA was all ARIMA(0,1,1), which means it is the best model.

Code
auto.arima(diff(hos_ts2))
Series: diff(hos_ts2) 
ARIMA(0,0,3) with zero mean 

Coefficients:
         ma1      ma2      ma3
      0.0996  -0.4661  -0.2804
s.e.  0.1375   0.1643   0.1693

sigma^2 = 462716317:  log likelihood = -545.8
AIC=1099.61   AICc=1100.54   BIC=1107.09

The best model from the step above was ARIMA(2,1,1), while the best model Auto ARIMA gave me is ARIMA(0,0,3) with drift. This discrepancy raises concerns about reliability, as Auto ARIMA tends to overlook instances of significant lag correlation, as evidenced by the ACF/PACF plots. Instead, it prioritizes minimizing AIC/BIC values without considering the full spectrum of model dynamics. This narrow focus risks recommending a model prone to overfitting, lacking in the comprehensive assessment necessary for accurate forecasting.

Code
auto.arima(diff(hos_ts3))
Series: diff(hos_ts3) 
ARIMA(0,0,3) with zero mean 

Coefficients:
         ma1      ma2      ma3
      0.1479  -0.5111  -0.3791
s.e.  0.1330   0.1547   0.1596

sigma^2 = 0.0005008:  log likelihood = 115.25
AIC=-222.51   AICc=-221.58   BIC=-215.02

The best model from the step above was ARIMA(2,0,0), while the best model Auto ARIMA gave me is ARIMA(0,0,3) with drift. This discrepancy raises concerns about reliability, as Auto ARIMA tends to overlook instances of significant lag correlation, as evidenced by the ACF/PACF plots. Instead, it prioritizes minimizing AIC/BIC values without considering the full spectrum of model dynamics. This narrow focus risks recommending a model prone to overfitting, lacking in the comprehensive assessment necessary for accurate forecasting.

Code
auto.arima(diff(diff(unemploy_ts)))
Series: diff(diff(unemploy_ts)) 
ARIMA(0,0,0) with zero mean 

sigma^2 = 0.000934:  log likelihood = 51.73
AIC=-101.45   AICc=-101.28   BIC=-100.23

The best model from the step above was ARIMA(1,0,1), while the best model Auto ARIMA gave me is ARIMA(0,0,0) with drift. This discrepancy raises concerns about reliability, as Auto ARIMA tends to overlook instances of significant lag correlation, as evidenced by the ACF/PACF plots. Instead, it prioritizes minimizing AIC/BIC values without considering the full spectrum of model dynamics. This narrow focus risks recommending a model prone to overfitting, lacking in the comprehensive assessment necessary for accurate forecasting.

Code
auto.arima(diff(stock_ts))
Series: diff(stock_ts) 
ARIMA(2,0,0) with zero mean 

Coefficients:
          ar1     ar2
      -0.2074  0.3307
s.e.   0.1500  0.1513

sigma^2 = 0.3424:  log likelihood = -40.65
AIC=87.29   AICc=87.85   BIC=92.84

The best model from the step above was ARIMA(1,0,3), while the best model Auto ARIMA gave me is ARIMA(2,0,0) with drift. This discrepancy raises concerns about reliability, as Auto ARIMA tends to overlook instances of significant lag correlation, as evidenced by the ACF/PACF plots. Instead, it prioritizes minimizing AIC/BIC values without considering the full spectrum of model dynamics. This narrow focus risks recommending a model prone to overfitting, lacking in the comprehensive assessment necessary for accurate forecasting.

Code
auto.arima(diff(demo_ts))
Series: diff(demo_ts) 
ARIMA(0,0,1) with zero mean 

Coefficients:
          ma1
      -0.9552
s.e.   0.1012

sigma^2 = 0.0004485:  log likelihood = 116.21
AIC=-228.43   AICc=-228.16   BIC=-224.69

The best model from the step above and from the Auto ARIMA was all ARIMA(0,0,1), which means it is the best model.

Code
auto.arima(diff(inde_ts))
Series: diff(inde_ts) 
ARIMA(1,0,0) with zero mean 

Coefficients:
          ar1
      -0.5722
s.e.   0.1176

sigma^2 = 0.001697:  log likelihood = 85.29
AIC=-166.58   AICc=-166.31   BIC=-162.83

The best model from the step above was ARIMA(0,0,1), while the best model Auto ARIMA gave me is ARIMA(1,0,0) with drift. This discrepancy raises concerns about reliability, as Auto ARIMA tends to overlook instances of significant lag correlation, as evidenced by the ACF/PACF plots. Instead, it prioritizes minimizing AIC/BIC values without considering the full spectrum of model dynamics. This narrow focus risks recommending a model prone to overfitting, lacking in the comprehensive assessment necessary for accurate forecasting.

Code
auto.arima(diff(rep_ts))
Series: diff(rep_ts) 
ARIMA(0,0,1) with zero mean 

Coefficients:
          ma1
      -0.8013
s.e.   0.1445

sigma^2 = 0.0004739:  log likelihood = 115.59
AIC=-227.18   AICc=-226.92   BIC=-223.44

The best model from the step above was ARIMA(1,0,1), while the best model Auto ARIMA gave me is ARIMA(0,0,1) with drift. This discrepancy raises concerns about reliability, as Auto ARIMA tends to overlook instances of significant lag correlation, as evidenced by the ACF/PACF plots. Instead, it prioritizes minimizing AIC/BIC values without considering the full spectrum of model dynamics. This narrow focus risks recommending a model prone to overfitting, lacking in the comprehensive assessment necessary for accurate forecasting.

Auto ARIMA may not always serve as the most dependable model for forecasting, for several reasons. Firstly, its reliance on predefined criteria for model selection can sometimes overlook subtle nuances within the data, which might be crucial for accurate predictions. Additionally, the automated nature of Auto ARIMA increases the risk of overfitting or selecting a model that is less than optimal. Thus, although Auto ARIMA is an incredibly potent analytical tool, it is essential to approach its projected results with caution and not rely on them unquestioningly.

8. Forecasting

Code
pred_techmu=forecast(fit_d_vacc_AR,50)
autoplot(pred_techmu) + theme_bw() 

Code
pred_techmu=forecast(fit_p_vacc_AR,50)
autoplot(pred_techmu) + theme_bw() 

Code
pred_techmu=forecast(fit_pf_vacc_AR,50)
autoplot(pred_techmu) + theme_bw() 

Code
pred_techmu=forecast(fit_case_AR,50)
autoplot(pred_techmu) + theme_bw() 

Code
pred_techmu=forecast(fit_dead_AR,50)
autoplot(pred_techmu) + theme_bw() 

Code
pred_techmu=forecast(fit_hos1_AR,50)
autoplot(pred_techmu) + theme_bw() 

Code
pred_techmu=forecast(fit_hos2_AR,50)
autoplot(pred_techmu) + theme_bw() 

Code
pred_techmu=forecast(fit_hos3_AR,50)
autoplot(pred_techmu) + theme_bw() 

Code
pred_techmu=forecast(fit_unemploy_AR,50)
autoplot(pred_techmu) + theme_bw() 

Code
pred_techmu=forecast(fit_stock_AR,50)
autoplot(pred_techmu) + theme_bw() 

Code
pred_techmu=forecast(fit_demo_AR,50)
autoplot(pred_techmu) + theme_bw() 

Code
pred_techmu=forecast(fit_inde_AR,50)
autoplot(pred_techmu) + theme_bw() 

Code
pred_techmu=forecast(fit_rep_AR,50)
autoplot(pred_techmu) + theme_bw() 

In the presented forecast graphs, the predictive trajectory is depicted by a blue line, surrounded by a confidence band in two shades of purple. The darker purple represents the 95% confidence interval, indicating a high level of certainty, while the lighter purple corresponds to the 5% interval, denoting lower confidence levels. Notably, as the forecast extends into the future, the confidence band expands, signifying a widening interval. This expansion reflects an increase in forecast uncertainty—the further we project into the future, the more variable and less certain the predictions become. This pattern is consistently observed across all plots, underscoring the inherent challenge of forecasting over extended periods.

9. ARIMA vs. Benchmarks

Code
fit_techmu_bench <- Arima(d_vacc_ts, order=c(2, 1, 2),include.drift = FALSE) 
autoplot(d_vacc_ts) +
  autolayer(meanf(d_vacc_ts, h=10),
            series="Mean", PI=FALSE) +
  autolayer(naive(d_vacc_ts, h=10),
            series="Naïve", PI=FALSE)+
  autolayer(rwf(d_vacc_ts, drift=TRUE, h=10),
            series="Drift", PI=FALSE)+
   autolayer(forecast(d_vacc_ts ,10), 
            series="Arima",PI=FALSE) + theme_bw() + ggtitle("Benchmark Methods Comparison with Daily Vaccinations")+
  guides(colour=guide_legend(title="Forecast")) 

The plot showcases a comparison of different forecasting methods for daily vaccination numbers. The black line represents the actual historical data for daily vaccinations, displaying a sharp peak in early 2021 and another in early 2022, followed by a decline. The forecasts from ARIMA, Drift, Mean, and Naïve models are depicted as flat lines beyond the historical data, indicating their prediction for future values. The ARIMA forecast appears slightly above zero, suggesting minimal change in future vaccination numbers, which could imply a matured vaccination campaign. Drift and Mean models predict a very slight downward and upward trend, respectively, while the Naïve model, often used as a baseline comparison, suggests no change, simply carrying the last observed data point forward. The stability in these predictions may reflect an anticipation that vaccination rates will level off, having addressed the immediate demand.

Code
fit_techmu_bench <- Arima(p_vacc_ts, order=c(0, 1, 3),include.drift = FALSE) 
autoplot(p_vacc_ts) +
  autolayer(meanf(p_vacc_ts, h=10),
            series="Mean", PI=FALSE) +
  autolayer(naive(p_vacc_ts, h=10),
            series="Naïve", PI=FALSE)+
  autolayer(rwf(p_vacc_ts, drift=TRUE, h=10),
            series="Drift", PI=FALSE)+
   autolayer(forecast(p_vacc_ts ,10), 
            series="Arima",PI=FALSE) + theme_bw() + ggtitle("Benchmark Methods Comparison with People Vaccinated")+
  guides(colour=guide_legend(title="Forecast")) 

The plot is a visual representation of the comparison between different forecasting methods for the number of people vaccinated over time. The historical data is shown by the black line, indicating the growth trend of vaccinations until the current period. The projections made by the ARIMA, Drift, Mean, and Naïve methods are depicted as flat lines extending from the last historical point into the future (2023 and beyond).

The ARIMA model predicts a slight increase in vaccination numbers, while the Drift method suggests a more optimistic steady rise. The Mean model forecasts a constant rate, and the Naïve method, which carries the last observed value forward, also indicates no change. It is clear that these models have varying degrees of optimism regarding the future trend of vaccination numbers, with ARIMA and Drift expecting growth, while Mean and Naïve forecasts imply stabilization.

Code
fit_techmu_bench <- Arima(pf_vacc_ts, order=c(0, 1, 3),include.drift = FALSE) 
autoplot(pf_vacc_ts) +
  autolayer(meanf(pf_vacc_ts, h=10),
            series="Mean", PI=FALSE) +
  autolayer(naive(pf_vacc_ts, h=10),
            series="Naïve", PI=FALSE)+
  autolayer(rwf(pf_vacc_ts, drift=TRUE, h=10),
            series="Drift", PI=FALSE)+
   autolayer(forecast(pf_vacc_ts ,10), 
            series="Arima",PI=FALSE) + theme_bw() + ggtitle("Benchmark Methods Comparison with People Fully Vaccinated")+
  guides(colour=guide_legend(title="Forecast")) 

This plot compares the forecasts of fully vaccinated individuals using several time series models. The black line represents the actual number of people fully vaccinated over time, showing an initial steep increase that plateaus as it moves into 2022. Predictions from the ARIMA, Drift, Mean, and Naïve models extend from the last data point. The ARIMA model projects a continued but slowing increase in fully vaccinated people, while the Drift model shows a steeper increase, suggesting higher future vaccination rates. The Mean model predicts a flat trend, indicating no significant change moving forward, and the Naïve model simply extends the last known value into the future, suggesting a static forecast. The models reflect different assumptions about the continuation of vaccination efforts and possible changes in public health policy or vaccine uptake.

Code
fit_techmu_bench <- Arima(case_ts, order=c(0, 1, 2),include.drift = FALSE) 
autoplot(case_ts) +
  autolayer(meanf(case_ts, h=10),
            series="Mean", PI=FALSE) +
  autolayer(naive(case_ts, h=10),
            series="Naïve", PI=FALSE)+
  autolayer(rwf(case_ts, drift=TRUE, h=10),
            series="Drift", PI=FALSE)+
   autolayer(forecast(case_ts ,10), 
            series="Arima",PI=FALSE) + theme_bw() + ggtitle("Benchmark Methods Comparison with Newly Confirmed Case")+
  guides(colour=guide_legend(title="Forecast")) 

The graph presents a forecast comparison for newly confirmed COVID-19 cases using various time series models. The historical data, illustrated by the black line, shows an increasing trend through 2020 and 2021, with a plateau into 2022. The forecast models—ARIMA, Drift, Mean, and Naïve—are indicated by different colored lines beyond the last historical data point. The ARIMA model predicts a steady upward trend, suggesting an increase in cases. In contrast, the Drift model shows a flat forecast, indicating little change. The Mean model also forecasts a constant trend, and the Naïve model projects a continuation of the last observed value. This graphical representation provides an outlook on potential future case trends based on different modeling approaches.

Code
fit_techmu_bench <- Arima(dead_ts, order=c(0, 1, 6),include.drift = FALSE) 
autoplot(dead_ts) +
  autolayer(meanf(dead_ts, h=10),
            series="Mean", PI=FALSE) +
  autolayer(naive(dead_ts, h=10),
            series="Naïve", PI=FALSE)+
  autolayer(rwf(dead_ts, drift=TRUE, h=10),
            series="Drift", PI=FALSE)+
   autolayer(forecast(dead_ts ,10), 
            series="Arima",PI=FALSE) + theme_bw() + ggtitle("Benchmark Methods Comparison with Death Case")+
  guides(colour=guide_legend(title="Forecast")) 

The plot illustrates a forecast comparison for COVID-19 death cases using various time series models. The black line represents the historical number of deaths, increasing initially and then plateauing into 2022. Forecasts by ARIMA, Drift, Mean, and Naïve models are shown as colored lines projecting beyond the last data point. The ARIMA model predicts an upward trend, possibly anticipating a rise in death cases. The Drift model’s forecast remains constant, while the Mean model suggests slight growth. The Naïve model extends the last value into the future, implying no immediate change. This analysis might help in understanding and preparing for potential future scenarios in public health planning.

Code
fit_techmu_bench <- Arima(hos_ts1, order=c(0, 1, 1),include.drift = FALSE) 
autoplot(hos_ts1) +
  autolayer(meanf(hos_ts1, h=10),
            series="Mean", PI=FALSE) +
  autolayer(naive(hos_ts1, h=10),
            series="Naïve", PI=FALSE)+
  autolayer(rwf(hos_ts1, drift=TRUE, h=10),
            series="Drift", PI=FALSE)+
   autolayer(forecast(hos_ts1 ,10), 
            series="Arima",PI=FALSE) + theme_bw() + ggtitle("Benchmark Methods Comparison with Inpatient Bed Number")+
  guides(colour=guide_legend(title="Forecast")) 

The plot compares different time series forecasting methods for the number of inpatient hospital beds occupied over time. The black line indicates the actual historical data, which shows a rapid increase at the beginning, stabilizing as it moves into the latter part of 2021 and remains relatively flat into 2022. Forecasts by the ARIMA, Drift, Mean, and Naïve methods are represented as colored lines extending from the end of the actual data into future years. The ARIMA model shows an optimistic continuous increase in bed occupancy, while the Drift model forecasts a flat trend. The Mean model predicts a very slight increase, suggesting stability, and the Naïve model extends the last known value, implying no expected change. Each model’s prediction reflects different assumptions and interpretations of the historical data’s underlying patterns.

Code
fit_techmu_bench <- Arima(hos_ts2, order=c(2, 1, 1),include.drift = FALSE) 
autoplot(hos_ts2) +
  autolayer(meanf(hos_ts2, h=10),
            series="Mean", PI=FALSE) +
  autolayer(naive(hos_ts2, h=10),
            series="Naïve", PI=FALSE)+
  autolayer(rwf(hos_ts2, drift=TRUE, h=10),
            series="Drift", PI=FALSE)+
   autolayer(forecast(hos_ts2 ,10), 
            series="Arima",PI=FALSE) + theme_bw() + ggtitle("Benchmark Methods Comparison with Inpatient Bed Number Used for COVID")+
  guides(colour=guide_legend(title="Forecast")) 

The plot illustrates a forecast comparison using different models for the number of inpatient hospital beds utilized for COVID-19 patients. The actual historical data, represented by the black line, shows several spikes, indicating surges in hospital bed usage at different times, presumably correlating with waves of the pandemic. Moving into the future, forecasts from ARIMA, Drift, Mean, and Naïve methods show diverging trends. ARIMA expects an increase, while Drift indicates a flat future trend. Mean and Naïve models suggest a slight increase and no change, respectively. The models likely reflect different assumptions about pandemic progression and healthcare needs.

Code
fit_techmu_bench <- Arima(hos_ts3, order=c(2, 0, 0),include.drift = FALSE) 
autoplot(hos_ts3) +
  autolayer(meanf(hos_ts3, h=10),
            series="Mean", PI=FALSE) +
  autolayer(naive(hos_ts3, h=10),
            series="Naïve", PI=FALSE)+
  autolayer(rwf(hos_ts3, drift=TRUE, h=10),
            series="Drift", PI=FALSE)+
   autolayer(forecast(hos_ts3 ,10), 
            series="Arima",PI=FALSE) + theme_bw() + ggtitle("Benchmark Methods Comparison with Utilization Rate for Inpatient Bed Used for COVID")+
  guides(colour=guide_legend(title="Forecast")) 

This plot appears to be a time series forecast comparing different methods (ARIMA, Drift, Mean, Naive) for predicting the utilization rate of inpatient beds used for COVID. The historical data shows significant fluctuations, which could correspond to various waves or surges in COVID cases. The forecast section shows that while some methods predict stability or a decline, the ARIMA model suggests a potential increase in bed utilization, which might anticipate a rise in cases or a change in hospitalization rates. The other methods seem to predict a relatively flat or stable future trend.

Code
fit_techmu_bench <- Arima(unemploy_ts, order=c(1, 0, 1),include.drift = FALSE) 
autoplot(unemploy_ts) +
  autolayer(meanf(unemploy_ts, h=10),
            series="Mean", PI=FALSE) +
  autolayer(naive(unemploy_ts, h=10),
            series="Naïve", PI=FALSE)+
  autolayer(rwf(unemploy_ts, drift=TRUE, h=10),
            series="Drift", PI=FALSE)+
   autolayer(forecast(unemploy_ts ,10), 
            series="Arima",PI=FALSE) + theme_bw() + ggtitle("Benchmark Methods Comparison with Unemployment Rate")+
  guides(colour=guide_legend(title="Forecast")) 

This plot appears to be a graphical representation comparing different forecasting methods for unemployment rates over time. The black line represents historical data on unemployment rates. The colored lines, which represent forecasts from various methods such as ARIMA, Drift, Mean, and Naïve, start from where the historical data ends. The plot shows the unemployment rate sharply increasing in 2020, likely due to the COVID-19 pandemic, then gradually decreasing over time, indicating recovery. The ARIMA model forecast seems to indicate a slight increase in unemployment in the future, while the Drift, Mean, and Naïve forecasts suggest a stable or decreasing trend.

Code
fit_techmu_bench <- Arima(stock_ts, order=c(1, 0, 3),include.drift = FALSE) 
autoplot(stock_ts) +
  autolayer(meanf(stock_ts, h=10),
            series="Mean", PI=FALSE) +
  autolayer(naive(stock_ts, h=10),
            series="Naïve", PI=FALSE)+
  autolayer(rwf(stock_ts, drift=TRUE, h=10),
            series="Drift", PI=FALSE)+
   autolayer(forecast(stock_ts ,10), 
            series="Arima",PI=FALSE) + theme_bw() + ggtitle("Benchmark Methods Comparison with Pfizer Stock Price")+
  guides(colour=guide_legend(title="Forecast")) 

The plot shows the historical stock price for Pfizer, represented by the black line, along with forecasts from different models: ARIMA, Drift, Mean, and Naïve. The colored horizontal lines indicate the forecasted stock price level according to each model from the present to 2024. The ARIMA model forecasts a slight decline, while the Drift and Mean models predict a stabilization of the stock price. The Naïve forecast suggests a more significant decline. This visualization is used to compare how different statistical methods anticipate the stock price trend based on historical data.

Code
fit_techmu_bench <- Arima(demo_ts, order=c(0, 0, 1),include.drift = FALSE) 
autoplot(demo_ts) +
  autolayer(meanf(demo_ts, h=10),
            series="Mean", PI=FALSE) +
  autolayer(naive(demo_ts, h=10),
            series="Naïve", PI=FALSE)+
  autolayer(rwf(demo_ts, drift=TRUE, h=10),
            series="Drift", PI=FALSE)+
   autolayer(forecast(demo_ts ,10), 
            series="Arima",PI=FALSE) + theme_bw() + ggtitle("Benchmark Methods Comparison with Support Rate for Democratic")+
  guides(colour=guide_legend(title="Forecast")) 

The uploaded plot appears to compare the performance of various forecasting methods—Arima, Drift, Mean, and Naïve—on a particular dataset over time. The solid black line likely represents actual historical data, and the horizontal colored lines project future predictions according to each method. These predictions might illustrate expected trends or levels for a variable such as support rates for a political party, stock prices, healthcare metrics, or economic indicators. The Arima forecast line suggests changes over time, while Drift, Mean, and Naïve methods seem to predict a constant future value, likely based on different statistical assumptions or calculations. This visualization helps to evaluate the different approaches to forecasting and their potential accuracy in predicting future trends or values.

Code
fit_techmu_bench <- Arima(inde_ts, order=c(0, 0, 1),include.drift = FALSE) 
autoplot(inde_ts) +
  autolayer(meanf(inde_ts, h=10),
            series="Mean", PI=FALSE) +
  autolayer(naive(inde_ts, h=10),
            series="Naïve", PI=FALSE)+
  autolayer(rwf(inde_ts, drift=TRUE, h=10),
            series="Drift", PI=FALSE)+
   autolayer(forecast(inde_ts ,10), 
            series="Arima",PI=FALSE) + theme_bw() + ggtitle("Benchmark Methods Comparison with Support Rate for Independent")+
  guides(colour=guide_legend(title="Forecast")) 

The plot you’ve shared appears to depict a time series analysis using various benchmark methods such as ARIMA, Drift, Mean, and Naïve to forecast future values related to a specific metric. The actual historical data is shown by the solid black line, which seems to have a particular trend or pattern. The forecast lines for each method start from where the actual data ends and project into the future, displaying the predicted values according to each method.

ARIMA is showing a distinct upward or downward trend, suggesting a specific model-based prediction. The Drift method appears to forecast a linear trend that picks up from the last observed point. The Mean forecast suggests that future values will hover around the historical average, while the Naïve method seems to project that future values will remain constant at the last observed value. Each method offers a different perspective on future expectations based on past data.

Code
fit_techmu_bench <- Arima(rep_ts, order=c(1, 0, 1),include.drift = FALSE) 
autoplot(rep_ts) +
  autolayer(meanf(rep_ts, h=10),
            series="Mean", PI=FALSE) +
  autolayer(naive(rep_ts, h=10),
            series="Naïve", PI=FALSE)+
  autolayer(rwf(rep_ts, drift=TRUE, h=10),
            series="Drift", PI=FALSE)+
   autolayer(forecast(rep_ts ,10), 
            series="Arima",PI=FALSE) + theme_bw() + ggtitle("Benchmark Methods Comparison with Support Rate for Republican")+
  guides(colour=guide_legend(title="Forecast")) 

It seems there was an error processing the image you’ve uploaded. Could you provide a description of what the image contains or try uploading it again? If it’s a plot or a graph, details about the axes, any legends or keys, and the general trend or data points shown would be helpful for me to give an explanation.

10. SARIMA ACF & PACF Plots

Code
d_vacc_ts %>% diff(lag=12) %>% ggtsdisplay()

You can observe from the seasonal differenced dataset that the time series plot looks significantly different. The seasonal cycles are removed, revealing the true trend of the data. Looking at the ACF and PACF plots, we can determine that:

P: 1,2 D: 1 Q: 1

Code
p_vacc_ts %>% diff(lag=12) %>% ggtsdisplay()

You can observe that there is no obvious seasonal difference in the dataset. The seasonal cycles are removed, revealing the true trend of the data. Looking at the ACF and PACF plots, we can determine that:

P: 1 D: 1,2 Q: 1

Code
pf_vacc_ts %>% diff(lag=12) %>% ggtsdisplay()

You can observe that there is no obvious seasonal difference in the dataset. The seasonal cycles are removed, revealing the true trend of the data. Looking at the ACF and PACF plots, we can determine that:

P: 1 D: 1,2 Q: 1

Code
case_ts %>% diff(lag=12) %>% ggtsdisplay()

You can observe that there is no obvious seasonal difference in the dataset. The seasonal cycles are removed, revealing the true trend of the data. Looking at the ACF and PACF plots, we can determine that:

P: 1 D: 1,2,3,4 Q: 1

Code
dead_ts %>% diff(lag=12) %>% ggtsdisplay()

You can observe that there is no obvious seasonal difference in the dataset. The seasonal cycles are removed, revealing the true trend of the data. Looking at the ACF and PACF plots, we can determine that:

P: 1 D: 1,2,3,4,5 Q: 1

Code
hos_ts1 %>% diff(lag=12) %>% ggtsdisplay()

You can observe that there is no obvious seasonal difference in the dataset. The seasonal cycles are removed, revealing the true trend of the data. Looking at the ACF and PACF plots, we can determine that:

P: 1 D: 1,2 Q: 1

Code
hos_ts2 %>% diff(lag=12) %>% ggtsdisplay()

You can observe from the seasonal differenced dataset that the time series plot looks significantly different. The seasonal cycles are removed, revealing the true trend of the data. Looking at the ACF and PACF plots, we can determine that:

P: 1 D: 1 Q: 1

Code
hos_ts3 %>% diff(lag=12) %>% ggtsdisplay()

You can observe from the seasonal differenced dataset that the time series plot looks significantly different. The seasonal cycles are removed, revealing the true trend of the data. Looking at the ACF and PACF plots, we can determine that:

P: 1,2 D: 1 Q: 1

Code
unemploy_ts %>% diff(lag=12) %>% ggtsdisplay()

You can observe that there is no obvious seasonal difference in the dataset. The seasonal cycles are removed, revealing the true trend of the data. Looking at the ACF and PACF plots, we can determine that:

P: 1 D: 1 Q: 1

Code
stock_ts %>% diff(lag=12) %>% ggtsdisplay()

You can observe from the seasonal differenced dataset that the time series plot looks significantly different. The seasonal cycles are removed, revealing the true trend of the data. Looking at the ACF and PACF plots, we can determine that:

P: 1 D: 1,2 Q: 1

Code
demo_ts %>% diff(lag=12) %>% ggtsdisplay()

You can observe from the seasonal differenced dataset that the time series plot looks significantly different. The seasonal cycles are removed, revealing the true trend of the data. Looking at the ACF and PACF plots, we can determine that:

P: 1 D: 1 Q: 1

Code
inde_ts %>% diff(lag=12) %>% ggtsdisplay()

You can observe from the seasonal differenced dataset that the time series plot looks significantly different. The seasonal cycles are removed, revealing the true trend of the data. Looking at the ACF and PACF plots, we can determine that:

P: 1 D: 1 Q: 1

Code
rep_ts %>% diff(lag=12) %>% ggtsdisplay()

You can observe from the seasonal differenced dataset that the time series plot looks significantly different. The seasonal cycles are removed, revealing the true trend of the data. Looking at the ACF and PACF plots, we can determine that:

P: 1 D: 1 Q: 1

11. SARIMA(p,d,q)

I will run different combinations of SARIMA with the values q = 1,2,3 d = 1,2 p = 1,2,5 P = 1,2 Q = 1 D = 1

Code
#write a funtion
SARIMA.c=function(p1,p2,q1,q2,P1,P2,Q1,Q2,data){
  
  #K=(p2+1)*(q2+1)*(P2+1)*(Q2+1)
  
  temp=c()
  d=1
  D=1
  s=12
  
  i=1
  temp= data.frame()
  ls=matrix(rep(NA,9*30),nrow=30)
  
  
  for (p in p1:p2)
  {
    for(q in q1:q2)
    {
      for(P in P1:P2)
      {
        for(Q in Q1:Q2)
        {
          if(p+d+q+P+D+Q<=9)
          {
            
            model<- Arima(data,order=c(p-1,d,q-1),seasonal=c(P-1,D,Q-1))
            ls[i,]= c(p-1,d,q-1,P-1,D,Q-1,model$aic,model$bic,model$aicc)
            i=i+1
            #print(i)
            
          }
          
        }
      }
    }
    
  }
  
  
  temp= as.data.frame(ls)
  names(temp)= c("p","d","q","P","D","Q","AIC","BIC","AICc")
  
  temp
  
}

output=SARIMA.c(p1=1,p2=5,q1=1,q2=3,P1=1,P2=2,Q1=1,Q2=2,data=diff(diff(d_vacc_ts)))


knitr::kable(output)
p d q P D Q AIC BIC AICc
0 1 0 0 1 0 398.4760 399.1840 398.7837
0 1 0 0 1 1 400.2716 401.6877 401.2716
0 1 0 1 1 0 400.2716 401.6877 401.2716
0 1 0 1 1 1 402.2716 404.3957 404.4534
0 1 1 0 1 0 394.5558 395.9719 395.5558
0 1 1 0 1 1 396.5371 398.6613 398.7189
0 1 1 1 1 0 396.5371 398.6613 398.7189
0 1 1 1 1 1 398.5371 401.3693 402.5371
0 1 2 0 1 0 396.1091 398.2333 398.2910
0 1 2 0 1 1 398.1065 400.9387 402.1065
0 1 2 1 1 0 398.1065 400.9387 402.1065
1 1 0 0 1 0 399.1057 400.5218 400.1057
1 1 0 0 1 1 401.0156 403.1398 403.1974
1 1 0 1 1 0 401.0156 403.1398 403.1974
1 1 0 1 1 1 403.0156 405.8478 407.0156
1 1 1 0 1 0 396.2122 398.3364 398.3940
1 1 1 0 1 1 398.2114 401.0436 402.2114
1 1 1 1 1 0 398.2114 401.0436 402.2114
1 1 2 0 1 0 398.1029 400.9351 402.1029
2 1 0 0 1 0 400.3463 402.4704 402.5281
2 1 0 0 1 1 402.2578 405.0900 406.2578
2 1 0 1 1 0 402.2578 405.0900 406.2578
2 1 1 0 1 0 397.6840 400.5162 401.6840
3 1 0 0 1 0 397.1631 399.9953 401.1631
NA NA NA NA NA NA NA NA NA
NA NA NA NA NA NA NA NA NA
NA NA NA NA NA NA NA NA NA
NA NA NA NA NA NA NA NA NA
NA NA NA NA NA NA NA NA NA
NA NA NA NA NA NA NA NA NA
Code
output[which.min(output$AIC),]
  p d q P D Q      AIC      BIC     AICc
5 0 1 1 0 1 0 394.5558 395.9719 395.5558
Code
output[which.min(output$BIC),]
  p d q P D Q      AIC      BIC     AICc
5 0 1 1 0 1 0 394.5558 395.9719 395.5558
Code
output[which.min(output$AICc),]
  p d q P D Q      AIC      BIC     AICc
5 0 1 1 0 1 0 394.5558 395.9719 395.5558

The model with the lowest AIC, BIC amd AICc is ARIMA(0,1,1)x(0,1,0)12.

Code
output=SARIMA.c(p1=1,p2=4,q1=1,q2=4,P1=1,P2=2,Q1=1,Q2=2,data=p_vacc_ts)

knitr::kable(output)
p d q P D Q AIC BIC AICc
0 1 0 0 1 0 100.48114 101.25372 100.76685
0 1 0 0 1 1 102.38215 103.92733 103.30523
0 1 0 1 1 0 102.38215 103.92733 103.30523
0 1 0 1 1 1 104.38215 106.69991 106.38215
0 1 1 0 1 0 88.52817 90.07335 89.45125
0 1 1 0 1 1 90.46696 92.78473 92.46696
0 1 1 1 1 0 90.46696 92.78473 92.46696
0 1 1 1 1 1 92.46696 95.55732 96.10333
0 1 2 0 1 0 78.02611 80.34388 80.02611
0 1 2 0 1 1 79.96905 83.05941 83.60542
0 1 2 1 1 0 79.96906 83.05941 83.60542
0 1 3 0 1 0 76.93242 80.02277 80.56878
1 1 0 0 1 0 79.14012 80.68530 80.06320
1 1 0 0 1 1 81.11958 83.43735 83.11958
1 1 0 1 1 0 81.11953 83.43730 83.11953
1 1 0 1 1 1 83.11956 86.20991 86.75592
1 1 1 0 1 0 78.34921 80.66698 80.34921
1 1 1 0 1 1 80.34859 83.43894 83.98495
1 1 1 1 1 0 80.34858 83.43894 83.98495
1 1 2 0 1 0 76.30351 79.39387 79.93988
2 1 0 0 1 0 78.18141 80.49917 80.18141
2 1 0 0 1 1 80.18140 83.27176 83.81777
2 1 0 1 1 0 80.18140 83.27176 83.81777
2 1 1 0 1 0 79.84174 82.93210 83.47811
3 1 0 0 1 0 79.07736 82.16771 82.71372
NA NA NA NA NA NA NA NA NA
NA NA NA NA NA NA NA NA NA
NA NA NA NA NA NA NA NA NA
NA NA NA NA NA NA NA NA NA
NA NA NA NA NA NA NA NA NA
Code
output[which.min(output$AIC),]
   p d q P D Q      AIC      BIC     AICc
20 1 1 2 0 1 0 76.30351 79.39387 79.93988
Code
output[which.min(output$BIC),]
   p d q P D Q      AIC      BIC     AICc
20 1 1 2 0 1 0 76.30351 79.39387 79.93988
Code
output[which.min(output$AICc),]
   p d q P D Q      AIC      BIC     AICc
20 1 1 2 0 1 0 76.30351 79.39387 79.93988

The model with the lowest AIC, BIC amd AICc is ARIMA(1,1,2)x(0,1,0)12.

Code
output=SARIMA.c(p1=1,p2=4,q1=1,q2=4,P1=1,P2=2,Q1=1,Q2=2,data=pf_vacc_ts)

knitr::kable(output)
p d q P D Q AIC BIC AICc
0 1 0 0 1 0 98.00877 98.78136 98.29449
0 1 0 0 1 1 99.89367 101.43885 100.81675
0 1 0 1 1 0 99.89367 101.43885 100.81675
0 1 0 1 1 1 101.89367 104.21144 103.89367
0 1 1 0 1 0 82.78603 84.33121 83.70911
0 1 1 0 1 1 84.72002 87.03778 86.72002
0 1 1 1 1 0 84.72002 87.03778 86.72002
0 1 1 1 1 1 86.72002 89.81037 90.35638
0 1 2 0 1 0 73.76322 76.08098 75.76322
0 1 2 0 1 1 75.63203 78.72239 79.26840
0 1 2 1 1 0 75.63204 78.72240 79.26840
0 1 3 0 1 0 69.84105 72.93140 73.47741
1 1 0 0 1 0 75.13328 76.67846 76.05635
1 1 0 0 1 1 77.12142 79.43919 79.12142
1 1 0 1 1 0 77.12142 79.43919 79.12142
1 1 0 1 1 1 79.12142 82.21178 82.75779
1 1 1 0 1 0 70.10211 72.41988 72.10211
1 1 1 0 1 1 72.10029 75.19064 75.73665
1 1 1 1 1 0 72.10029 75.19065 75.73665
1 1 2 0 1 0 68.96748 72.05784 72.60385
2 1 0 0 1 0 69.08627 71.40404 71.08627
2 1 0 0 1 1 71.07520 74.16556 74.71156
2 1 0 1 1 0 71.07456 74.16491 74.71092
2 1 1 0 1 0 69.61066 72.70102 73.24703
3 1 0 0 1 0 68.91017 72.00053 72.54654
NA NA NA NA NA NA NA NA NA
NA NA NA NA NA NA NA NA NA
NA NA NA NA NA NA NA NA NA
NA NA NA NA NA NA NA NA NA
NA NA NA NA NA NA NA NA NA
Code
output[which.min(output$AIC),]
   p d q P D Q      AIC      BIC     AICc
25 3 1 0 0 1 0 68.91017 72.00053 72.54654
Code
output[which.min(output$BIC),]
   p d q P D Q      AIC      BIC     AICc
21 2 1 0 0 1 0 69.08627 71.40404 71.08627
Code
output[which.min(output$AICc),]
   p d q P D Q      AIC      BIC     AICc
21 2 1 0 0 1 0 69.08627 71.40404 71.08627

The model with the lowest AIC, BIC amd AICc is ARIMA(2,1,0)x(0,1,0)12.

Due to the presence of non-stationary seasonality in this time series data, we have opted to discontinue its use. Non-stationary seasonality implies that the patterns and trends within the data exhibit variations over time, without displaying a consistent and predictable behavior. As a result, attempting to model or analyze such data may lead to unreliable or inaccurate outcomes. Hence, to ensure the robustness and validity of our analyses, we have decided to cease utilizing this particular time series.

Code
output=SARIMA.c(p1=1,p2=4,q1=1,q2=4,P1=1,P2=2,Q1=1,Q2=2,data=dead_ts)

knitr::kable(output)
p d q P D Q AIC BIC AICc
0 1 0 0 1 0 706.0650 707.4662 706.2078
0 1 0 0 1 1 707.8100 710.6124 708.2544
0 1 0 1 1 0 708.0441 710.8465 708.4886
0 1 0 1 1 1 708.3570 712.5606 709.2801
0 1 1 0 1 0 680.9710 683.7734 681.4154
0 1 1 0 1 1 680.6631 684.8667 681.5862
0 1 1 1 1 0 682.6574 686.8610 683.5805
0 1 1 1 1 1 682.0388 687.6436 683.6388
0 1 2 0 1 0 667.8860 672.0896 668.8091
0 1 2 0 1 1 663.0587 668.6635 664.6587
0 1 2 1 1 0 666.1448 671.7496 667.7448
0 1 3 0 1 0 669.6639 675.2687 671.2639
1 1 0 0 1 0 684.1800 686.9824 684.6244
1 1 0 0 1 1 681.6694 685.8730 682.5925
1 1 0 1 1 0 684.9236 689.1272 685.8467
1 1 0 1 1 1 683.5516 689.1564 685.1516
1 1 1 0 1 0 668.1578 672.3614 669.0809
1 1 1 0 1 1 663.7715 669.3763 665.3715
1 1 1 1 1 0 666.1812 671.7860 667.7812
1 1 2 0 1 0 668.5934 674.1982 670.1934
2 1 0 0 1 0 673.8361 678.0397 674.7592
2 1 0 0 1 1 672.1399 677.7447 673.7399
2 1 0 1 1 0 673.8880 679.4928 675.4880
2 1 1 0 1 0 668.2286 673.8334 669.8286
3 1 0 0 1 0 670.8526 676.4574 672.4526
NA NA NA NA NA NA NA NA NA
NA NA NA NA NA NA NA NA NA
NA NA NA NA NA NA NA NA NA
NA NA NA NA NA NA NA NA NA
NA NA NA NA NA NA NA NA NA
Code
output[which.min(output$AIC),]
   p d q P D Q      AIC      BIC     AICc
10 0 1 2 0 1 1 663.0587 668.6635 664.6587
Code
output[which.min(output$BIC),]
   p d q P D Q      AIC      BIC     AICc
10 0 1 2 0 1 1 663.0587 668.6635 664.6587
Code
output[which.min(output$AICc),]
   p d q P D Q      AIC      BIC     AICc
10 0 1 2 0 1 1 663.0587 668.6635 664.6587

The model with the lowest AIC, BIC amd AICc is ARIMA(0,1,2)x(0,1,1)12.

Code
output=SARIMA.c(p1=1,p2=4,q1=1,q2=4,P1=1,P2=2,Q1=1,Q2=2,data=diff(hos_ts1))

knitr::kable(output)
p d q P D Q AIC BIC AICc
0 1 0 0 1 0 898.3779 899.9333 898.4992
0 1 0 0 1 1 900.3664 903.4771 900.7414
0 1 0 1 1 0 900.3668 903.4775 900.7418
0 1 0 1 1 1 902.3740 907.0400 903.1481
0 1 1 0 1 0 889.3236 892.4343 889.6986
0 1 1 0 1 1 891.2997 895.9657 892.0739
0 1 1 1 1 0 891.3006 895.9667 892.0748
0 1 1 1 1 1 893.3158 899.5372 894.6491
0 1 2 0 1 0 890.2569 894.9229 891.0311
0 1 2 0 1 1 892.2341 898.4554 893.5674
0 1 2 1 1 0 892.2345 898.4559 893.5678
0 1 3 0 1 0 887.8623 894.0837 889.1957
1 1 0 0 1 0 896.5763 899.6870 896.9513
1 1 0 0 1 1 898.5269 903.1929 899.3010
1 1 0 1 1 0 898.5321 903.1982 899.3063
1 1 0 1 1 1 900.5115 906.7329 901.8448
1 1 1 0 1 0 890.3868 895.0528 891.1610
1 1 1 0 1 1 892.3537 898.5751 893.6871
1 1 1 1 1 0 892.3548 898.5762 893.6881
1 1 2 0 1 0 892.2498 898.4711 893.5831
2 1 0 0 1 0 896.7770 901.4431 897.5512
2 1 0 0 1 1 898.7687 904.9901 900.1020
2 1 0 1 1 0 898.7689 904.9903 900.1023
2 1 1 0 1 0 891.5259 897.7473 892.8592
3 1 0 0 1 0 874.9692 881.1906 876.3025
NA NA NA NA NA NA NA NA NA
NA NA NA NA NA NA NA NA NA
NA NA NA NA NA NA NA NA NA
NA NA NA NA NA NA NA NA NA
NA NA NA NA NA NA NA NA NA
Code
output[which.min(output$AIC),]
   p d q P D Q      AIC      BIC     AICc
25 3 1 0 0 1 0 874.9692 881.1906 876.3025
Code
output[which.min(output$BIC),]
   p d q P D Q      AIC      BIC     AICc
25 3 1 0 0 1 0 874.9692 881.1906 876.3025
Code
output[which.min(output$AICc),]
   p d q P D Q      AIC      BIC     AICc
25 3 1 0 0 1 0 874.9692 881.1906 876.3025

The model with the lowest AIC, BIC amd AICc is ARIMA(3,1,0)x(0,1,0)12.

Code
output=SARIMA.c(p1=1,p2=4,q1=1,q2=4,P1=1,P2=2,Q1=1,Q2=2,data=diff(hos_ts2))

knitr::kable(output)
p d q P D Q AIC BIC AICc
0 1 0 0 1 0 845.9925 847.5478 846.1137
0 1 0 0 1 1 832.8488 835.9595 833.2238
0 1 0 1 1 0 831.0523 834.1630 831.4273
0 1 0 1 1 1 831.1107 835.7767 831.8849
0 1 1 0 1 0 833.9298 837.0405 834.3048
0 1 1 0 1 1 823.9534 828.6195 824.7276
0 1 1 1 1 0 828.7642 833.4302 829.5384
0 1 1 1 1 1 825.2345 831.4559 826.5678
0 1 2 0 1 0 833.4111 838.0771 834.1853
0 1 2 0 1 1 822.2415 828.4629 823.5748
0 1 2 1 1 0 824.6883 830.9097 826.0216
0 1 3 0 1 0 830.4527 836.6741 831.7860
1 1 0 0 1 0 846.8351 849.9458 847.2101
1 1 0 0 1 1 833.9114 838.5775 834.6856
1 1 0 1 1 0 832.9176 837.5837 833.6918
1 1 0 1 1 1 832.6582 838.8796 833.9915
1 1 1 0 1 0 834.8774 839.5434 835.6515
1 1 1 0 1 1 824.1566 830.3780 825.4899
1 1 1 1 1 0 827.2692 833.4906 828.6025
1 1 2 0 1 0 833.6106 839.8320 834.9440
2 1 0 0 1 0 842.0853 846.7514 842.8595
2 1 0 0 1 1 829.0510 835.2724 830.3844
2 1 0 1 1 0 829.3747 835.5961 830.7080
2 1 1 0 1 0 830.6702 836.8916 832.0036
3 1 0 0 1 0 835.3499 841.5713 836.6832
NA NA NA NA NA NA NA NA NA
NA NA NA NA NA NA NA NA NA
NA NA NA NA NA NA NA NA NA
NA NA NA NA NA NA NA NA NA
NA NA NA NA NA NA NA NA NA
Code
output[which.min(output$AIC),]
   p d q P D Q      AIC      BIC     AICc
10 0 1 2 0 1 1 822.2415 828.4629 823.5748
Code
output[which.min(output$BIC),]
   p d q P D Q      AIC      BIC     AICc
10 0 1 2 0 1 1 822.2415 828.4629 823.5748
Code
output[which.min(output$AICc),] 
   p d q P D Q      AIC      BIC     AICc
10 0 1 2 0 1 1 822.2415 828.4629 823.5748

The model with the lowest AIC, BIC amd AICc is ARIMA(0,1,2)x(0,1,1)12.

Code
output=SARIMA.c(p1=1,p2=4,q1=1,q2=4,P1=1,P2=2,Q1=1,Q2=2,data=diff(hos_ts3))

knitr::kable(output)
p d q P D Q AIC BIC AICc
0 1 0 0 1 0 -114.0213 -112.4660 -113.9001
0 1 0 0 1 1 -126.5420 -123.4313 -126.1670
0 1 0 1 1 0 -128.0841 -124.9734 -127.7091
0 1 0 1 1 1 -127.8453 -123.1793 -127.0711
0 1 1 0 1 0 -124.8268 -121.7162 -124.4518
0 1 1 0 1 1 -133.5901 -128.9241 -132.8159
0 1 1 1 1 0 -129.0858 -124.4198 -128.3116
0 1 1 1 1 1 -132.1038 -125.8824 -130.7705
0 1 2 0 1 0 -127.2883 -122.6222 -126.5141
0 1 2 0 1 1 -138.2548 -132.0334 -136.9214
0 1 2 1 1 0 -136.5076 -130.2863 -135.1743
0 1 3 0 1 0 -129.4209 -123.1995 -128.0876
1 1 0 0 1 0 -112.5920 -109.4813 -112.2170
1 1 0 0 1 1 -124.8998 -120.2337 -124.1256
1 1 0 1 1 0 -126.0867 -121.4207 -125.3125
1 1 0 1 1 1 -125.9527 -119.7313 -124.6193
1 1 1 0 1 0 -124.3921 -119.7261 -123.6179
1 1 1 0 1 1 -134.3442 -128.1228 -133.0108
1 1 1 1 1 0 -127.0859 -120.8645 -125.7526
1 1 2 0 1 0 -127.0127 -120.7913 -125.6794
2 1 0 0 1 0 -120.4829 -115.8168 -119.7087
2 1 0 0 1 1 -133.3714 -127.1500 -132.0381
2 1 0 1 1 0 -133.1602 -126.9388 -131.8269
2 1 1 0 1 0 -131.4478 -125.2264 -130.1145
3 1 0 0 1 0 -126.1666 -119.9452 -124.8333
NA NA NA NA NA NA NA NA NA
NA NA NA NA NA NA NA NA NA
NA NA NA NA NA NA NA NA NA
NA NA NA NA NA NA NA NA NA
NA NA NA NA NA NA NA NA NA
Code
output[which.min(output$AIC),]
   p d q P D Q       AIC       BIC      AICc
10 0 1 2 0 1 1 -138.2548 -132.0334 -136.9214
Code
output[which.min(output$BIC),]
   p d q P D Q       AIC       BIC      AICc
10 0 1 2 0 1 1 -138.2548 -132.0334 -136.9214
Code
output[which.min(output$AICc),] 
   p d q P D Q       AIC       BIC      AICc
10 0 1 2 0 1 1 -138.2548 -132.0334 -136.9214

The model with the lowest AIC, BIC amd AICc is ARIMA(0,1,2)x(0,1,1)12.

Due to the presence of non-stationary seasonality in this time series data, we have opted to discontinue its use. Non-stationary seasonality implies that the patterns and trends within the data exhibit variations over time, without displaying a consistent and predictable behavior. As a result, attempting to model or analyze such data may lead to unreliable or inaccurate outcomes. Hence, to ensure the robustness and validity of our analyses, we have decided to cease utilizing this particular time series.

Code
output=SARIMA.c(p1=1,p2=4,q1=1,q2=4,P1=1,P2=2,Q1=1,Q2=2,data=stock_ts)

knitr::kable(output)
p d q P D Q AIC BIC AICc
0 1 0 0 1 0 80.86001 82.41536 80.98122
0 1 0 0 1 1 81.55934 84.67004 81.93434
0 1 0 1 1 0 81.99495 85.10564 82.36995
0 1 0 1 1 1 83.06165 87.72769 83.83584
0 1 1 0 1 0 82.57429 85.68499 82.94929
0 1 1 0 1 1 83.44977 88.11581 84.22396
0 1 1 1 1 0 83.89083 88.55687 84.66502
0 1 1 1 1 1 84.79458 91.01598 86.12792
0 1 2 0 1 0 83.06013 87.72618 83.83433
0 1 2 0 1 1 83.79587 90.01727 85.12921
0 1 2 1 1 0 84.32594 90.54733 85.65927
0 1 3 0 1 0 84.78325 91.00465 86.11659
1 1 0 0 1 0 82.45053 85.56123 82.82553
1 1 0 0 1 1 83.39813 88.06418 84.17233
1 1 0 1 1 0 83.83987 88.50591 84.61406
1 1 0 1 1 1 84.66777 90.88916 86.00111
1 1 1 0 1 0 82.89926 87.56530 83.67345
1 1 1 0 1 1 83.75634 89.97773 85.08967
1 1 1 1 1 0 84.30468 90.52607 85.63802
1 1 2 0 1 0 84.49396 90.71535 85.82729
2 1 0 0 1 0 82.76803 87.43407 83.54222
2 1 0 0 1 1 83.56090 89.78229 84.89423
2 1 0 1 1 0 84.08517 90.30657 85.41851
2 1 1 0 1 0 84.41491 90.63631 85.74825
3 1 0 0 1 0 84.50528 90.72667 85.83861
NA NA NA NA NA NA NA NA NA
NA NA NA NA NA NA NA NA NA
NA NA NA NA NA NA NA NA NA
NA NA NA NA NA NA NA NA NA
NA NA NA NA NA NA NA NA NA
Code
output[which.min(output$AIC),]
  p d q P D Q      AIC      BIC     AICc
1 0 1 0 0 1 0 80.86001 82.41536 80.98122
Code
output[which.min(output$BIC),]
  p d q P D Q      AIC      BIC     AICc
1 0 1 0 0 1 0 80.86001 82.41536 80.98122
Code
output[which.min(output$AICc),] 
  p d q P D Q      AIC      BIC     AICc
1 0 1 0 0 1 0 80.86001 82.41536 80.98122

The model with the lowest AIC, BIC amd AICc is ARIMA(0,1,0)x(0,1,0)12.

Code
output=SARIMA.c(p1=1,p2=4,q1=1,q2=4,P1=1,P2=2,Q1=1,Q2=2,data=demo_ts)

knitr::kable(output)
p d q P D Q AIC BIC AICc
0 1 0 0 1 0 -129.3895 -127.8059 -129.2718
0 1 0 0 1 1 -134.2483 -131.0813 -133.8847
0 1 0 1 1 0 -135.0908 -131.9238 -134.7272
0 1 0 1 1 1 -133.1372 -128.3867 -132.3872
0 1 1 0 1 0 -144.8954 -141.7283 -144.5317
0 1 1 0 1 1 -150.0106 -145.2600 -149.2606
0 1 1 1 1 0 -150.8358 -146.0853 -150.0858
0 1 1 1 1 1 -148.8645 -142.5304 -147.5741
0 1 2 0 1 0 -143.2911 -138.5405 -142.5411
0 1 2 0 1 1 -148.1194 -141.7853 -146.8291
0 1 2 1 1 0 -148.8999 -142.5658 -147.6096
0 1 3 0 1 0 -141.6947 -135.3606 -140.4043
1 1 0 0 1 0 -135.8274 -132.6603 -135.4637
1 1 0 0 1 1 -145.4978 -140.7472 -144.7478
1 1 0 1 1 0 -148.0413 -143.2908 -147.2913
1 1 0 1 1 1 -146.0553 -139.7212 -144.7649
1 1 1 0 1 0 -143.3479 -138.5974 -142.5979
1 1 1 0 1 1 -148.1797 -141.8456 -146.8894
1 1 1 1 1 0 -148.9446 -142.6105 -147.6543
1 1 2 0 1 0 -141.3706 -135.0366 -140.0803
2 1 0 0 1 0 -134.8497 -130.0992 -134.0997
2 1 0 0 1 1 -144.0340 -137.7000 -142.7437
2 1 0 1 1 0 -146.6904 -140.3563 -145.4001
2 1 1 0 1 0 -141.4355 -135.1014 -140.1452
3 1 0 0 1 0 -135.3470 -129.0129 -134.0567
NA NA NA NA NA NA NA NA NA
NA NA NA NA NA NA NA NA NA
NA NA NA NA NA NA NA NA NA
NA NA NA NA NA NA NA NA NA
NA NA NA NA NA NA NA NA NA
Code
output[which.min(output$AIC),]
  p d q P D Q       AIC       BIC      AICc
7 0 1 1 1 1 0 -150.8358 -146.0853 -150.0858
Code
output[which.min(output$BIC),]
  p d q P D Q       AIC       BIC      AICc
7 0 1 1 1 1 0 -150.8358 -146.0853 -150.0858
Code
output[which.min(output$AICc),] 
  p d q P D Q       AIC       BIC      AICc
7 0 1 1 1 1 0 -150.8358 -146.0853 -150.0858

The model with the lowest AIC, BIC amd AICc is ARIMA(0,1,1)x(1,1,0)12.

Code
output=SARIMA.c(p1=1,p2=4,q1=1,q2=4,P1=1,P2=2,Q1=1,Q2=2,data=inde_ts)

knitr::kable(output)
p d q P D Q AIC BIC AICc
0 1 0 0 1 0 -76.67213 -75.08861 -76.55449
0 1 0 0 1 1 -86.88426 -83.71722 -86.52063
0 1 0 1 1 0 -82.45823 -79.29120 -82.09460
0 1 0 1 1 1 -84.93352 -80.18297 -84.18352
0 1 1 0 1 0 -95.56052 -92.39348 -95.19688
0 1 1 0 1 1 -103.76691 -99.01636 -103.01691
0 1 1 1 1 0 -100.32623 -95.57567 -99.57623
0 1 1 1 1 1 -101.77080 -95.43673 -100.48048
0 1 2 0 1 0 -93.58167 -88.83111 -92.83167
0 1 2 0 1 1 -101.87717 -95.54309 -100.58685
0 1 2 1 1 0 -98.36084 -92.02676 -97.07052
0 1 3 0 1 0 -93.12094 -86.78686 -91.83062
1 1 0 0 1 0 -88.70640 -85.53936 -88.34277
1 1 0 0 1 1 -98.25774 -93.50719 -97.50774
1 1 0 1 1 0 -95.57919 -90.82863 -94.82919
1 1 0 1 1 1 -96.32103 -89.98695 -95.03071
1 1 1 0 1 0 -93.59064 -88.84009 -92.84064
1 1 1 0 1 1 -101.91998 -95.58591 -100.62966
1 1 1 1 1 0 -98.37787 -92.04379 -97.08755
1 1 2 0 1 0 -91.99172 -85.65764 -90.70139
2 1 0 0 1 0 -86.92499 -82.17443 -86.17499
2 1 0 0 1 1 -96.45215 -90.11807 -95.16182
2 1 0 1 1 0 -93.76546 -87.43138 -92.47513
2 1 1 0 1 0 -92.77531 -86.44123 -91.48498
3 1 0 0 1 0 -87.23273 -80.89865 -85.94241
NA NA NA NA NA NA NA NA NA
NA NA NA NA NA NA NA NA NA
NA NA NA NA NA NA NA NA NA
NA NA NA NA NA NA NA NA NA
NA NA NA NA NA NA NA NA NA
Code
output[which.min(output$AIC),]
  p d q P D Q       AIC       BIC      AICc
6 0 1 1 0 1 1 -103.7669 -99.01636 -103.0169
Code
output[which.min(output$BIC),]
  p d q P D Q       AIC       BIC      AICc
6 0 1 1 0 1 1 -103.7669 -99.01636 -103.0169
Code
output[which.min(output$AICc),] 
  p d q P D Q       AIC       BIC      AICc
6 0 1 1 0 1 1 -103.7669 -99.01636 -103.0169

The model with the lowest AIC, BIC amd AICc is ARIMA(0,1,1)x(0,1,1)12.

Code
output=SARIMA.c(p1=1,p2=4,q1=1,q2=4,P1=1,P2=2,Q1=1,Q2=2,data=rep_ts)

knitr::kable(output)
p d q P D Q AIC BIC AICc
0 1 0 0 1 0 -140.5431 -138.9596 -140.4254
0 1 0 0 1 1 -147.6760 -144.5090 -147.3124
0 1 0 1 1 0 -143.3595 -140.1924 -142.9958
0 1 0 1 1 1 -145.7567 -141.0062 -145.0067
0 1 1 0 1 0 -140.9421 -137.7750 -140.5784
0 1 1 0 1 1 -148.9078 -144.1573 -148.1578
0 1 1 1 1 0 -142.2549 -137.5043 -141.5049
0 1 1 1 1 1 -147.0896 -140.7555 -145.7992
0 1 2 0 1 0 -146.7962 -142.0457 -146.0462
0 1 2 0 1 1 -152.7731 -146.4390 -151.4828
0 1 2 1 1 0 -149.2569 -142.9228 -147.9665
0 1 3 0 1 0 -144.8497 -138.5156 -143.5593
1 1 0 0 1 0 -138.7525 -135.5855 -138.3889
1 1 0 0 1 1 -146.5084 -141.7579 -145.7584
1 1 0 1 1 0 -141.5539 -136.8034 -140.8039
1 1 0 1 1 1 -144.7059 -138.3718 -143.4155
1 1 1 0 1 0 -144.3857 -139.6351 -143.6357
1 1 1 0 1 1 -152.0404 -145.7063 -150.7500
1 1 1 1 1 0 -146.9071 -140.5730 -145.6167
1 1 2 0 1 0 -144.8238 -138.4898 -143.5335
2 1 0 0 1 0 -141.9897 -137.2391 -141.2397
2 1 0 0 1 1 -146.8787 -140.5447 -145.5884
2 1 0 1 1 0 -143.3547 -137.0206 -142.0644
2 1 1 0 1 0 -145.9783 -139.6443 -144.6880
3 1 0 0 1 0 -141.9418 -135.6077 -140.6514
NA NA NA NA NA NA NA NA NA
NA NA NA NA NA NA NA NA NA
NA NA NA NA NA NA NA NA NA
NA NA NA NA NA NA NA NA NA
NA NA NA NA NA NA NA NA NA
Code
output[which.min(output$AIC),]
   p d q P D Q       AIC      BIC      AICc
10 0 1 2 0 1 1 -152.7731 -146.439 -151.4828
Code
output[which.min(output$BIC),]
   p d q P D Q       AIC      BIC      AICc
10 0 1 2 0 1 1 -152.7731 -146.439 -151.4828
Code
output[which.min(output$AICc),] 
   p d q P D Q       AIC      BIC      AICc
10 0 1 2 0 1 1 -152.7731 -146.439 -151.4828

The model with the lowest AIC, BIC amd AICc is ARIMA(0,1,2)x(0,1,1)12.

12. Fitting Best SARIMA(p,d,q) & Diagnostics

Due to the limitation where the number of lags exceeds the available number of observations in some of the time series data, we’ve adjusted these SARIMA model to utilize 4 lags instead of the initially intended 12. This adaptation ensures that the model remains feasible and effectively captures the temporal dependencies within the data, albeit with a reduced lag length. While this modification may slightly alter the model’s predictive capacity, it allows us to derive meaningful insights and forecasts while circumventing the constraint posed by the insufficient number of observations.

Code
set.seed(123)
model_output <- capture.output(sarima(diff(diff(d_vacc_ts)), 0,1,1,0,1,0,4))

Code
cat(model_output[35:45], model_output[length(model_output)], sep = "\n") 
converged
<><><><><><><><><><><><><><>
 
Coefficients: 
    Estimate     SE t.value p.value
ma1  -0.9153 0.2663 -3.4378  0.0023

sigma^2 estimated as 8164902333 on 22 degrees of freedom 
 
AIC = 25.91335  AICc = 25.92164  BIC = 26.01209 
 
 

The Standard Residual Plot appears good, displaying stationarity with a nearly constant mean and variation.

The Autocorrelation Function (ACF) Plot shows almost no correlation indicating that the model has harnessed everything and all that is left is white noise. This indicates a good model fit.

The Quantile-Quantile (Q-Q) Plot demonstrates near-normality.

The Ljung-Box test results reveal values above the 0.05 (5% significance) threshold, indicating a good fit.

$ttable: all coefficient is significant.

The equation for the model is: \[x_t = w_t -0.9153w_{t-1} \]

Code
set.seed(123)
model_output <- capture.output(sarima(p_vacc_ts, 1,1,2,0,1,0,4))

Code
cat(model_output[14:26], model_output[length(model_output)], sep = "\n") 
converged
<><><><><><><><><><><><><><>
 
Coefficients: 
    Estimate     SE t.value p.value
ar1   0.4289 0.2617  1.6388  0.1161
ma1   0.9434 0.2609  3.6161  0.0016
ma2   0.3449 0.3130  1.1018  0.2830

sigma^2 estimated as 2.270531 on 21 degrees of freedom 
 
AIC = 4.070457  AICc = 4.120457  BIC = 4.266799 
 
 

The Standard Residual Plot appears good, displaying stationarity with a nearly constant mean and variation.

The Autocorrelation Function (ACF) Plot shows almost no correlation indicating that the model has harnessed everything and all that is left is white noise. This indicates a good model fit.

The Quantile-Quantile (Q-Q) Plot demonstrates near-normality.

The Ljung-Box test results reveal values above the 0.05 (5% significance) threshold, indicating a good fit.

$ttable: only ma1 is significant.

The equation for the model is: \[x_t = 0.4289x_{t-1} + w_t + 0.9434w_{t-1} + 0.3449w_{t-2}\]

Code
set.seed(123)
model_output <- capture.output(sarima(pf_vacc_ts, 2,1,0,0,1,0,4))

Code
cat(model_output[19:30], model_output[length(model_output)], sep = "\n")
converged
<><><><><><><><><><><><><><>
 
Coefficients: 
    Estimate     SE t.value p.value
ar1   1.4015 0.1538  9.1117       0
ar2  -0.8044 0.1419 -5.6687       0

sigma^2 estimated as 2.090858 on 22 degrees of freedom 
 
AIC = 3.950764  AICc = 3.974574  BIC = 4.098021 
 
 

The Standard Residual Plot appears good, displaying stationarity with a nearly constant mean and variation.

The Autocorrelation Function (ACF) Plot shows almost no correlation indicating that the model has harnessed everything and all that is left is white noise. This indicates a good model fit.

The Quantile-Quantile (Q-Q) Plot demonstrates near-normality.

The Ljung-Box test results reveal values above the 0.05 (5% significance) threshold, indicating a good fit.

$ttable: all coefficients are significant.

The equation for the model is: \[x_t = 1.40159x_{t-1} - 0.8044x_{t-2}\]

Code
set.seed(123)
model_output <- capture.output(sarima(dead_ts, 0,1,2,0,1,1,12))

Code
cat(model_output[36:48], model_output[length(model_output)], sep = "\n")
converged
<><><><><><><><><><><><><><>
 
Coefficients: 
     Estimate     SE t.value p.value
ma1    1.8515 0.2207  8.3910  0.0000
ma2    0.9997 0.2351  4.2512  0.0002
sma1  -0.9977 0.5750 -1.7352  0.0941

sigma^2 estimated as 85591993 on 27 degrees of freedom 
 
AIC = 22.10196  AICc = 22.13273  BIC = 22.28878 
 
 

The Standard Residual Plot appears good, displaying stationarity with a nearly constant mean and variation.

The Autocorrelation Function (ACF) Plot shows almost no correlation indicating that the model has harnessed everything and all that is left is white noise. This indicates a good model fit.

The Quantile-Quantile (Q-Q) Plot demonstrates near-normality.

The Ljung-Box test results reveal values above the 0.05 (5% significance) threshold, indicating a good fit.

$ttable: ma1 and ma2 are significant.

The equation for the model is: \[x_t = 1.8515x_{t-1} + 0.9997x_{t-2} - 0.9977 + a_{t}\]

Code
set.seed(123)
model_output <- capture.output(sarima(diff(hos_ts1), 3,1,0,0,1,0,12))

Code
cat(model_output[32:44], model_output[length(model_output)], sep = "\n")
converged
<><><><><><><><><><><><><><>
 
Coefficients: 
    Estimate     SE  t.value p.value
ar1  -0.4824 0.1197  -4.0287  0.0003
ar2  -0.4614 0.1364  -3.3828  0.0019
ar3  -0.8923 0.0828 -10.7763  0.0000

sigma^2 estimated as 2912500402 on 32 degrees of freedom 
 
AIC = 24.99912  AICc = 25.02124  BIC = 25.17687 
 
 

The Standard Residual Plot appears good, displaying stationarity with a nearly constant mean and variation.

The Autocorrelation Function (ACF) Plot shows almost no correlation indicating that the model has harnessed everything and all that is left is white noise. This indicates a good model fit.

The Quantile-Quantile (Q-Q) Plot demonstrates near-normality.

The Ljung-Box test results reveal values above the 0.05 (5% significance) threshold, indicating a good fit.

$ttable: all coefficients are significant.

The equation for the model is: \[x_t = -0.4824x_{t-1} - 0.4614x_{t-2} - 0.8923x_{t-2}\]

Code
set.seed(123)
model_output <- capture.output(sarima(diff(hos_ts2), 0,1,2,0,1,1,12))

Code
cat(model_output[22:34], model_output[length(model_output)], sep = "\n")
converged
<><><><><><><><><><><><><><>
 
Coefficients: 
     Estimate     SE t.value p.value
ma1   -0.5988 0.2210 -2.7100  0.0107
ma2   -0.4011 0.1836 -2.1847  0.0363
sma1  -0.9994 0.3640 -2.7454  0.0098

sigma^2 estimated as 416745453 on 32 degrees of freedom 
 
AIC = 23.49261  AICc = 23.51473  BIC = 23.67037 
 
 

The Standard Residual Plot appears good, displaying stationarity with a nearly constant mean and variation.

The Autocorrelation Function (ACF) Plot shows almost no correlation indicating that the model has harnessed everything and all that is left is white noise. This indicates a good model fit.

The Quantile-Quantile (Q-Q) Plot demonstrates near-normality.

The Ljung-Box test results reveal values above the 0.05 (5% significance) threshold, indicating a good fit.

$ttable: all coefficients are significant.

The equation for the model is: \[x_t = -0.5988x_{t-1} - 0.4011x_{t-2} - 0.9994 + a_{t}\]

Code
set.seed(123)
model_output <- capture.output(sarima(diff(hos_ts3), 0,1,2,0,1,1,12))

Code
cat(model_output[22:34], model_output[length(model_output)], sep = "\n")
converged
<><><><><><><><><><><><><><>
 
Coefficients: 
     Estimate     SE t.value p.value
ma1   -0.4717 0.2009 -2.3476  0.0252
ma2   -0.5283 0.1702 -3.1046  0.0040
sma1  -0.9997 0.3681 -2.7156  0.0106

sigma^2 estimated as 0.0005031713 on 32 degrees of freedom 
 
AIC = -3.950136  AICc = -3.928016  BIC = -3.772382 
 
 

The Standard Residual Plot appears good, displaying stationarity with a nearly constant mean and variation.

The Autocorrelation Function (ACF) Plot shows almost no correlation indicating that the model has harnessed everything and all that is left is white noise. This indicates a good model fit.

The Quantile-Quantile (Q-Q) Plot demonstrates near-normality.

The Ljung-Box test results reveal values above the 0.05 (5% significance) threshold, indicating a good fit.

$ttable: all coefficients are significant.

The equation for the model is: \[x_t = -0.4717x_{t-1} - 0.5283x_{t-2} - 0.9997 + a_{t}\]

Code
set.seed(123)
model_output <- capture.output(sarima(stock_ts, 0,1,0,0,1,0,12))

Code
cat(model_output[1:9], model_output[length(model_output)], sep = "\n")
<><><><><><><><><><><><><><>
 
Coefficients: 
     Estimate p.value

sigma^2 estimated as 0.5572542 on 35 degrees of freedom 
 
AIC = 2.310286  AICc = 2.310286  BIC = 2.354725 
 
 

The Standard Residual Plot appears good, displaying stationarity with a nearly constant mean and variation.

The Autocorrelation Function (ACF) Plot shows almost no correlation indicating that the model has harnessed everything and all that is left is white noise. This indicates a good model fit.

The Quantile-Quantile (Q-Q) Plot demonstrates near-normality.

The Ljung-Box test results reveal values above the 0.05 (5% significance) threshold, indicating a good fit.

Code
set.seed(123)
model_output <- capture.output(sarima(demo_ts, 0,1,1,1,1,0,12))

Code
cat(model_output[23:34], model_output[length(model_output)], sep = "\n")
converged
<><><><><><><><><><><><><><>
 
Coefficients: 
     Estimate     SE t.value p.value
ma1   -0.8685 0.1540 -5.6389  0.0000
sar1  -0.5087 0.1495 -3.4021  0.0017

sigma^2 estimated as 0.0006502313 on 34 degrees of freedom 
 
AIC = -4.189884  AICc = -4.179783  BIC = -4.057924 
 
 

The Standard Residual Plot appears good, displaying stationarity with a nearly constant mean and variation.

The Autocorrelation Function (ACF) Plot shows almost no correlation indicating that the model has harnessed everything and all that is left is white noise. This indicates a good model fit.

The Quantile-Quantile (Q-Q) Plot demonstrates near-normality.

The Ljung-Box test results reveal values above the 0.05 (5% significance) threshold, indicating a good fit.

The equation for the model is: \[x_t = -0.8685x_{t-1} - 0.5087 + a_{t}\]

$ttable: all coefficients are significant.

Code
set.seed(123)
model_output <- capture.output(sarima(inde_ts, 0,1,1,0,1,1,12))

Code
cat(model_output[28:39], model_output[length(model_output)], sep = "\n")
converged
<><><><><><><><><><><><><><>
 
Coefficients: 
     Estimate     SE t.value p.value
ma1   -0.9999 0.2161 -4.6278  0.0001
sma1  -0.9998 0.6468 -1.5458  0.1314

sigma^2 estimated as 0.001547766 on 34 degrees of freedom 
 
AIC = -2.882414  AICc = -2.872313  BIC = -2.750454 
 
 

The Standard Residual Plot appears good, displaying stationarity with a nearly constant mean and variation.

The Autocorrelation Function (ACF) Plot shows almost no correlation indicating that the model has harnessed everything and all that is left is white noise. This indicates a good model fit.

The Quantile-Quantile (Q-Q) Plot demonstrates near-normality.

The Ljung-Box test results reveal values above the 0.05 (5% significance) threshold, indicating a good fit.

$ttable: only ma1 are significant.

The equation for the model is: \[x_t = -0.9999x_{t-1} - 0.9998 + a_{t}\]

Code
set.seed(123)
model_output <- capture.output(sarima(rep_ts, 0,1,2,0,1,1,12))

Code
cat(model_output[25:34], model_output[length(model_output)], sep = "\n")
Coefficients: 
     Estimate     SE t.value p.value
ma1   -0.4338 0.1777 -2.4408  0.0202
ma2   -0.4032 0.1828 -2.2057  0.0345
sma1  -0.9999 0.5380 -1.8587  0.0720

sigma^2 estimated as 0.0004077668 on 33 degrees of freedom 
 
AIC = -4.243697  AICc = -4.222864  BIC = -4.06775 
 
 

The Standard Residual Plot appears good, displaying stationarity with a nearly constant mean and variation.

The Autocorrelation Function (ACF) Plot shows almost no correlation indicating that the model has harnessed everything and all that is left is white noise. This indicates a good model fit.

The Quantile-Quantile (Q-Q) Plot demonstrates near-normality.

The Ljung-Box test results reveal values above the 0.05 (5% significance) threshold, indicating a good fit.

$ttable: all coefficients are significant.

The equation for the model is: \[x_t = -0.4338x_{t-1} - 0.4032x_{t-2} - 0.9999 + a_{t}\]

13. Forecasting

Code
time <- Arima(diff(diff(d_vacc_ts)), order=c(0,1,1), seasonal=c(0,1,0))

# forecast next three years
time %>% forecast(h=36) %>% autoplot()+theme_bw() +
      theme(plot.background = element_rect(fill = "#D9E3F1", color = NA),
            panel.background = element_rect(fill = "#D9E3F1", color = NA))

Code
time <- Arima(p_vacc_ts, order=c(1,1,2), seasonal=c(0,1,0))

# forecast next three years
time %>% forecast(h=36) %>% autoplot()+theme_bw() +
      theme(plot.background = element_rect(fill = "#D9E3F1", color = NA),
            panel.background = element_rect(fill = "#D9E3F1", color = NA))

Code
time <- Arima(pf_vacc_ts, order=c(2,1,0), seasonal=c(0,1,0))

# forecast next three years
time %>% forecast(h=36) %>% autoplot()+theme_bw() +
      theme(plot.background = element_rect(fill = "#D9E3F1", color = NA),
            panel.background = element_rect(fill = "#D9E3F1", color = NA))

Code
time <- Arima(dead_ts, order=c(0,1,2), seasonal=c(0,1,1))

# forecast next three years
time %>% forecast(h=36) %>% autoplot()+theme_bw() +
      theme(plot.background = element_rect(fill = "#D9E3F1", color = NA),
            panel.background = element_rect(fill = "#D9E3F1", color = NA))

Code
time <- Arima(diff(hos_ts1), order=c(3,1,0), seasonal=c(0,1,0))

# forecast next three years
time %>% forecast(h=36) %>% autoplot()+theme_bw() +
      theme(plot.background = element_rect(fill = "#D9E3F1", color = NA),
            panel.background = element_rect(fill = "#D9E3F1", color = NA))

Code
time <- Arima(diff(hos_ts2), order=c(0,1,2), seasonal=c(0,1,1))

# forecast next three years
time %>% forecast(h=36) %>% autoplot()+theme_bw() +
      theme(plot.background = element_rect(fill = "#D9E3F1", color = NA),
            panel.background = element_rect(fill = "#D9E3F1", color = NA))

Code
time <- Arima(diff(hos_ts3), order=c(0,1,2), seasonal=c(0,1,1))

# forecast next three years
time %>% forecast(h=36) %>% autoplot()+theme_bw() +
      theme(plot.background = element_rect(fill = "#D9E3F1", color = NA),
            panel.background = element_rect(fill = "#D9E3F1", color = NA)) 

Code
time <- Arima(stock_ts, order=c(0,1,0), seasonal=c(0,1,0))

# forecast next three years
time %>% forecast(h=36) %>% autoplot()+theme_bw() +
      theme(plot.background = element_rect(fill = "#D9E3F1", color = NA),
            panel.background = element_rect(fill = "#D9E3F1", color = NA))

Code
time <- Arima(demo_ts, order=c(0,1,1), seasonal=c(1,1,0))

# forecast next three years
time %>% forecast(h=36) %>% autoplot()+theme_bw() +
      theme(plot.background = element_rect(fill = "#D9E3F1", color = NA),
            panel.background = element_rect(fill = "#D9E3F1", color = NA))

Code
time <- Arima(inde_ts, order=c(0,1,1), seasonal=c(0,1,1))

# forecast next three years
time %>% forecast(h=36) %>% autoplot()+theme_bw() +
      theme(plot.background = element_rect(fill = "#D9E3F1", color = NA),
            panel.background = element_rect(fill = "#D9E3F1", color = NA))

Code
time <- Arima(rep_ts, order=c(0,1,2), seasonal=c(0,1,1))

# forecast next three years
time %>% forecast(h=36) %>% autoplot()+theme_bw() +
      theme(plot.background = element_rect(fill = "#D9E3F1", color = NA),
            panel.background = element_rect(fill = "#D9E3F1", color = NA))

14. SARIMA vs. Benchmarks

Code
fit_techmu_bench <- Arima(d_vacc_ts, order=c(2, 1, 2),include.drift = FALSE) 
autoplot(d_vacc_ts) +
  autolayer(meanf(d_vacc_ts, h=10),
            series="Mean", PI=FALSE) +
  autolayer(naive(d_vacc_ts, h=10),
            series="Naïve", PI=FALSE)+
  autolayer(rwf(d_vacc_ts, drift=TRUE, h=10),
            series="Drift", PI=FALSE)+
   autolayer(forecast(d_vacc_ts ,10), 
            series="Arima",PI=FALSE) + theme_bw() + ggtitle("Benchmark Methods Comparison with Daily Vaccinations")+
  guides(colour=guide_legend(title="Forecast")) 

The plot you’ve uploaded appears to be a time series chart comparing different forecasting methods against actual data. The actual data is represented by the black line, and it shows daily vaccinations up to a certain point in time. The colored lines represent forecasts from different models, including ARIMA, Drift, Mean, and Naïve methods, projected beyond the actual data into future dates. Each forecast method predicts a different outcome for future vaccination numbers, with the ARIMA model suggesting a continued steady rate, while other models predict various levels of change or stability. Unfortunately, I can’t display the plot here, but I can provide descriptions or summaries of visual data when you upload it.

Code
fit_techmu_bench <- Arima(p_vacc_ts, order=c(0, 1, 3),include.drift = FALSE) 
autoplot(p_vacc_ts) +
  autolayer(meanf(p_vacc_ts, h=10),
            series="Mean", PI=FALSE) +
  autolayer(naive(p_vacc_ts, h=10),
            series="Naïve", PI=FALSE)+
  autolayer(rwf(p_vacc_ts, drift=TRUE, h=10),
            series="Drift", PI=FALSE)+
   autolayer(forecast(p_vacc_ts ,10), 
            series="Arima",PI=FALSE) + theme_bw() + ggtitle("Benchmark Methods Comparison with People Vaccinated")+
  guides(colour=guide_legend(title="Forecast")) 

This plot appears to be comparing the forecasted number of people vaccinated using different benchmark methods against actual historical data. The black line represents the historical data of people vaccinated over time. The different colored lines at the end of the historical data represent forecasts made by different models for future vaccinations: Arima, Drift, Mean, and Naïve. The Arima model shows a sharp upward trend, suggesting a significant increase in vaccinations, while the other models forecast a relatively steady or only slightly increasing trend.

Code
fit_techmu_bench <- Arima(pf_vacc_ts, order=c(0, 1, 3),include.drift = FALSE) 
autoplot(pf_vacc_ts) +
  autolayer(meanf(pf_vacc_ts, h=10),
            series="Mean", PI=FALSE) +
  autolayer(naive(pf_vacc_ts, h=10),
            series="Naïve", PI=FALSE)+
  autolayer(rwf(pf_vacc_ts, drift=TRUE, h=10),
            series="Drift", PI=FALSE)+
   autolayer(forecast(pf_vacc_ts ,10), 
            series="Arima",PI=FALSE) + theme_bw() + ggtitle("Benchmark Methods Comparison with People Fully Vaccinated")+
  guides(colour=guide_legend(title="Forecast")) 

The plot appears to be a time series graph that shows data on the number of people fully vaccinated over time. The black line represents the actual historical data, showing a steady increase in the number of fully vaccinated individuals over time. The colored lines represent different forecasting methods projected into the future (2024 and beyond), such as ARIMA (Autoregressive Integrated Moving Average), Drift, Mean, and Naïve forecasting. Each method provides a different projection, indicating varying expectations about future vaccination trends based on past data.

Code
fit_techmu_bench <- Arima(case_ts, order=c(0, 1, 2),include.drift = FALSE) 
autoplot(case_ts) +
  autolayer(meanf(case_ts, h=10),
            series="Mean", PI=FALSE) +
  autolayer(naive(case_ts, h=10),
            series="Naïve", PI=FALSE)+
  autolayer(rwf(case_ts, drift=TRUE, h=10),
            series="Drift", PI=FALSE)+
   autolayer(forecast(case_ts ,10), 
            series="Arima",PI=FALSE) + theme_bw() + ggtitle("Benchmark Methods Comparison with Newly Confirmed Case")+
  guides(colour=guide_legend(title="Forecast")) 

The plot appears to show a comparison of different forecasting methods for a time series data set related to newly confirmed cases of a condition, likely COVID-19, given the context. The black line represents the actual historical data, while the various colored lines project into the future with forecasts from different methods. The ARIMA forecast (red) predicts an increase, while the Drift (green), Mean (blue), and Naïve (purple) methods predict a flat or slightly varied continuation of the most recent data. This type of visualization is used to compare the predictive performance of different statistical or machine learning models.

Code
fit_techmu_bench <- Arima(dead_ts, order=c(0, 1, 6),include.drift = FALSE) 
autoplot(dead_ts) +
  autolayer(meanf(dead_ts, h=10),
            series="Mean", PI=FALSE) +
  autolayer(naive(dead_ts, h=10),
            series="Naïve", PI=FALSE)+
  autolayer(rwf(dead_ts, drift=TRUE, h=10),
            series="Drift", PI=FALSE)+
   autolayer(forecast(dead_ts ,10), 
            series="Arima",PI=FALSE) + theme_bw() + ggtitle("Benchmark Methods Comparison with Death Case")+
  guides(colour=guide_legend(title="Forecast")) 

The plot is a comparison of forecast methods for death cases over time, with historical data shown by the black line increasing from 2020 through 2023. On the y-axis, death cases are plotted on a logarithmic scale, allowing for a wide range of values. Past 2023, four different forecast methods are shown: ARIMA (red line), suggesting a continuous increase; Drift (green line), indicating a more moderate increase; Mean (blue line), projecting a flat trend indicating no change from the last actual data point; and Naive (purple line), also predicting no change moving forward. These forecasts are meant to predict future values based on the historical trend and their respective statistical assumptions. The plot serves as a visual assessment tool to compare how each method anticipates the future based on the given data.

Code
fit_techmu_bench <- Arima(hos_ts1, order=c(0, 1, 1),include.drift = FALSE) 
autoplot(hos_ts1) +
  autolayer(meanf(hos_ts1, h=10),
            series="Mean", PI=FALSE) +
  autolayer(naive(hos_ts1, h=10),
            series="Naïve", PI=FALSE)+
  autolayer(rwf(hos_ts1, drift=TRUE, h=10),
            series="Drift", PI=FALSE)+
   autolayer(forecast(hos_ts1 ,10), 
            series="Arima",PI=FALSE) + theme_bw() + ggtitle("Benchmark Methods Comparison with Inpatient Bed Number")+
  guides(colour=guide_legend(title="Forecast")) 

This graph illustrates the comparison of various forecasting methods applied to the number of inpatient beds over time. The historical data, plotted as a black line, shows a sharp increase in the number of beds around 2020, followed by a stabilization around 6e+05 (600,000 beds). Post-2023, the graph features predictions from different statistical models: ARIMA (red line) predicts a significant increase in bed numbers; Drift (green line) forecasts a slight upward trend; Mean (blue line) suggests the number will remain constant, equal to the historical average; and Naive (purple line) assumes no change, extending the last data point forward. The plot serves as a tool to visualize and evaluate how these models anticipate changes in hospital bed availability, with each model’s forecast based on its specific methodological approach to the existing data.

Code
fit_techmu_bench <- Arima(hos_ts2, order=c(2, 1, 1),include.drift = FALSE) 
autoplot(hos_ts2) +
  autolayer(meanf(hos_ts2, h=10),
            series="Mean", PI=FALSE) +
  autolayer(naive(hos_ts2, h=10),
            series="Naïve", PI=FALSE)+
  autolayer(rwf(hos_ts2, drift=TRUE, h=10),
            series="Drift", PI=FALSE)+
   autolayer(forecast(hos_ts2 ,10), 
            series="Arima",PI=FALSE) + theme_bw() + ggtitle("Benchmark Methods Comparison with Inpatient Bed Number Used for COVID")+
  guides(colour=guide_legend(title="Forecast")) 

The plot depicts the number of inpatient hospital beds used for COVID-19 over time, showing a volatile history with several peaks, particularly notable in 2020 and 2021. The time series data, illustrated by the black line, exhibits sharp increases and decreases, suggesting waves or surges in hospital bed usage due to the pandemic. Looking into the future beyond the historical data, various forecasting methods have been applied, shown by the horizontal lines after 2023: ARIMA (red) forecasts a slight increase, Drift (green) indicates stability with a very mild upward trend, Mean (blue) predicts a constant number equal to the historical average, and Naive (purple) extends the last observed data point forward, assuming no change. These forecasts provide a range of potential future scenarios for hospital bed usage, reflecting the differing assumptions and calculations inherent to each forecasting model.

Code
fit_techmu_bench <- Arima(hos_ts3, order=c(2, 0, 0),include.drift = FALSE) 
autoplot(hos_ts3) +
  autolayer(meanf(hos_ts3, h=10),
            series="Mean", PI=FALSE) +
  autolayer(naive(hos_ts3, h=10),
            series="Naïve", PI=FALSE)+
  autolayer(rwf(hos_ts3, drift=TRUE, h=10),
            series="Drift", PI=FALSE)+
   autolayer(forecast(hos_ts3 ,10), 
            series="Arima",PI=FALSE) + theme_bw() + ggtitle("Benchmark Methods Comparison with Utilization Rate for Inpatient Bed Used for COVID")+
  guides(colour=guide_legend(title="Forecast")) 

This plot compares different forecasting methods for the utilization rate of inpatient beds used for COVID-19, as indicated by the historical data (black line) from 2020 to 2023. The y-axis represents the utilization rate, which shows significant fluctuations, peaking notably at various points likely corresponding to waves of COVID-19 cases. The forecast methods, represented by colored lines beyond 2023, provide different projections: ARIMA (red line) shows a slight increasing trend, Drift (green line) indicates a marginal increase, Mean (blue line) suggests a flat trend at the historical average, and Naive (purple line) extends the last observed data point, assuming the rate will remain unchanged. These models are used to predict future bed utilization, offering a visual tool for comparing the potential accuracy and assumptions of each method against future real-world data.

Code
fit_techmu_bench <- Arima(unemploy_ts, order=c(1, 0, 1),include.drift = FALSE) 
autoplot(unemploy_ts) +
  autolayer(meanf(unemploy_ts, h=10),
            series="Mean", PI=FALSE) +
  autolayer(naive(unemploy_ts, h=10),
            series="Naïve", PI=FALSE)+
  autolayer(rwf(unemploy_ts, drift=TRUE, h=10),
            series="Drift", PI=FALSE)+
   autolayer(forecast(unemploy_ts ,10), 
            series="Arima",PI=FALSE) + theme_bw() + ggtitle("Benchmark Methods Comparison with Unemployment Rate")+
  guides(colour=guide_legend(title="Forecast")) 

This plot displays the historical trend and forecasts of the unemployment rate from 2020 to beyond 2022. The black line represents the actual historical unemployment rate, which shows a sharp spike in 2020, followed by a general decline over the subsequent years. Looking to the future, the forecasts made by different models are represented by the horizontal lines: ARIMA (red line) predicts a downward trend, continuing the decline of the unemployment rate; Drift (green line) suggests a stable rate, maintaining the last observed rate; Mean (blue line) forecasts that the unemployment rate will average out to a steady state, ignoring the downward trend; and Naive (purple line) projects no change, extending the last observed point into the future. The plot serves to compare how these forecasting methods project the future of unemployment rates based on the past data and their respective algorithmic interpretations.

Code
fit_techmu_bench <- Arima(stock_ts, order=c(1, 0, 3),include.drift = FALSE) 
autoplot(stock_ts) +
  autolayer(meanf(stock_ts, h=10),
            series="Mean", PI=FALSE) +
  autolayer(naive(stock_ts, h=10),
            series="Naïve", PI=FALSE)+
  autolayer(rwf(stock_ts, drift=TRUE, h=10),
            series="Drift", PI=FALSE)+
   autolayer(forecast(stock_ts ,10), 
            series="Arima",PI=FALSE) + theme_bw() + ggtitle("Benchmark Methods Comparison with Pfizer Stock Price")+
  guides(colour=guide_legend(title="Forecast")) 

The plot shows the historical performance and future forecast of Pfizer’s stock price. The black line represents the stock price from 2020 through part of 2023, with the price experiencing volatility and an overall downward trend. Projected forecasts beyond the historical data are made using four methods: ARIMA (red line) predicts a continuing decline; Drift (green line) forecasts a slight increase; Mean (blue line) suggests the stock price will level off to the average of the historical data; and Naive (purple line) assumes the stock price will remain constant at the last observed value. These different forecasts highlight the variability in predicting stock prices depending on the modeling technique used, with each forecast method taking a unique approach to extrapolate future prices from the past data.

Code
fit_techmu_bench <- Arima(demo_ts, order=c(0, 0, 1),include.drift = FALSE) 
autoplot(demo_ts) +
  autolayer(meanf(demo_ts, h=10),
            series="Mean", PI=FALSE) +
  autolayer(naive(demo_ts, h=10),
            series="Naïve", PI=FALSE)+
  autolayer(rwf(demo_ts, drift=TRUE, h=10),
            series="Drift", PI=FALSE)+
   autolayer(forecast(demo_ts ,10), 
            series="Arima",PI=FALSE) + theme_bw() + ggtitle("Benchmark Methods Comparison with Support Rate for Democratic")+
  guides(colour=guide_legend(title="Forecast")) 

The graph illustrates the historical support rate for the Democratic Party (presumably in the United States) from 2020 to the latter part of 2023, along with projected forecasts using various methods. The support rate, shown by the black line, fluctuates over time with notable volatility but remains within a band between approximately 0.86 and 0.92. The future forecasts, indicated by the lines extending from the end of 2023 to 2025, predict the support rate using different statistical models: ARIMA (red) forecasts a stable support rate continuing from the last observed point; Drift (green) also suggests a stable but slightly declining trend; Mean (blue) projects that the support rate will level off to the historical mean, and Naive (purple) predicts no change, carrying the last observed support rate forward. These projections provide a range of scenarios for future party support, each based on different assumptions about the patterns in the historical data.

Code
fit_techmu_bench <- Arima(inde_ts, order=c(0, 0, 1),include.drift = FALSE) 
autoplot(inde_ts) +
  autolayer(meanf(inde_ts, h=10),
            series="Mean", PI=FALSE) +
  autolayer(naive(inde_ts, h=10),
            series="Naïve", PI=FALSE)+
  autolayer(rwf(inde_ts, drift=TRUE, h=10),
            series="Drift", PI=FALSE)+
   autolayer(forecast(inde_ts ,10), 
            series="Arima",PI=FALSE) + theme_bw() + ggtitle("Benchmark Methods Comparison with Support Rate for Independent")+
  guides(colour=guide_legend(title="Forecast")) 

The plot displays the fluctuating support rate for Independents, presumably in a political context, from 2020 through 2023, and forecasts for this rate into 2025 using different statistical methods. The black line shows actual historical data, indicating that support for Independents has varied, with rates moving between just below 0.25 and around 0.35. Post-2023, the forecast lines suggest different future trends: the ARIMA model (red) predicts a very slight decline, the Drift method (green) suggests a constant rate with a small upward tendency, the Mean (blue) indicates a flat forecast at the historical average, and the Naive approach (purple) projects the rate will remain unchanged at the last observed point. These projections offer diverse perspectives on potential future support for Independents based on past patterns.

Code
fit_techmu_bench <- Arima(rep_ts, order=c(1, 0, 1),include.drift = FALSE) 
autoplot(rep_ts) +
  autolayer(meanf(rep_ts, h=10),
            series="Mean", PI=FALSE) +
  autolayer(naive(rep_ts, h=10),
            series="Naïve", PI=FALSE)+
  autolayer(rwf(rep_ts, drift=TRUE, h=10),
            series="Drift", PI=FALSE)+
   autolayer(forecast(rep_ts ,10), 
            series="Arima",PI=FALSE) + theme_bw() + ggtitle("Benchmark Methods Comparison with Support Rate for Republican")+
  guides(colour=guide_legend(title="Forecast")) 

This plot presents the support rate for the Republican Party from 2020 through 2023 and includes projections to 2025 based on various forecasting models. The support rate, depicted by the black line, exhibits volatility with values oscillating primarily between 0.05 and 0.1. The forecast section post-2023 features predictions from different models: ARIMA (red line) suggests a decrease in support; Drift (green line) forecasts a small upward trend; Mean (blue line) indicates that support will stabilize at the historical average rate; and Naive (purple line) predicts the support rate will remain constant at the end of the observed data. These differing forecasts provide insights into possible future trends of Republican support, each based on distinct statistical assumptions and calculations.

15. Cross Validation

We did cross validation to select the best performed model to those time series with over 40 samples. Since the Auto Arima function did not provide us with the best SARIMA model, we use the model with lowest AIC and second lowest AIC to compare their performance.

In the death case time series, we select model ARIMA(0,1,2)x(0,1,1)12(lowest AIC) and model ARIMA(0,1,2)x(1,1,0)12(second lowest AIC).

Code
#n=length(dead_ts)
#n-k=24; 24/12=2; 
k=19
mae1 <- matrix(NA, 2,12) 
mae2 <- matrix(NA, 2,12)

st <- tsp(dead_ts)[1]+(k-1)/12 #24 observations
# put up to 2
for(i in 1:2)
{
  #xtrain <- window(a10, start=st+(i-k+1)/12, end=st+i/12)
  xtrain <- window(dead_ts, end=st + i-1)
  xtest <- window(dead_ts, start=st + (i-1) + 1/12, end=st + i)
  # 1st model
  fit <- Arima(xtrain, order=c(0,1,2), seasonal=list(order=c(0,1,1), period=12),
                include.drift=TRUE, method="ML")
  fcast <- forecast(fit, h=1)
  # 2nd Arima
  fit2 <- Arima(xtrain, order=c(0,1,2), seasonal=list(order=c(1,1,0), period=12),
                include.drift=TRUE, method="ML")
  fcast2 <- forecast(fit2, h=1)
  
  mae1[i,] <- abs(fcast$mean-xtest)
  mae2[i,] <- abs(fcast2$mean-xtest)
  
}

max_mae <- max(c(colMeans(mae1, na.rm = TRUE), colMeans(mae2, na.rm = TRUE)), na.rm = TRUE)
ylim_range <- c(0, max_mae + max_mae * 0.3)
plot(1:12, colMeans(mae1, na.rm = TRUE), type = "l", col = 2, xlab = "Horizon", ylab = "MAE", ylim = ylim_range)
lines(1:12, colMeans(mae2, na.rm = TRUE), type = "l", col = 3)
legend("topleft", legend = c("Model with lowest AIC", "Model with second lowest AIC"), col = 2:3, lty = 1)

We can see the model with the lowest AIC actually performs better!

In the inpatient bed time series, we select model ARIMA(3,1,0)x(0,1,0)12(lowest AIC) and model ARIMA(1,1,1)x(0,1,0)12(second lowest AIC).

Code
#n=length(hos_ts1) 49
#n-k=25; 24/12=2; 
k=25
mae1 <- matrix(NA, 2,12) 
mae2 <- matrix(NA, 2,12)

st <- tsp(hos_ts1)[1]+(k-1)/12 #24 observations
# put up to 2
for(i in 1:2)
{
  #xtrain <- window(a10, start=st+(i-k+1)/12, end=st+i/12)
  xtrain <- window(hos_ts1, end=st + i-1)
  xtest <- window(hos_ts1, start=st + (i-1) + 1/12, end=st + i)
  # 1st model
  fit <- Arima(xtrain, order=c(3,1,0), seasonal=list(order=c(0,1,0), period=12),
                include.drift=TRUE, method="ML")
  fcast <- forecast(fit, h=1)
  # 2nd Arima
  fit2 <- Arima(xtrain, order=c(1,1,1), seasonal=list(order=c(0,1,0), period=12),
                include.drift=TRUE, method="ML")
  fcast2 <- forecast(fit2, h=1)
  
  mae1[i,] <- abs(fcast$mean-xtest)
  mae2[i,] <- abs(fcast2$mean-xtest)
  
}

max_mae <- max(c(colMeans(mae1, na.rm = TRUE), colMeans(mae2, na.rm = TRUE)), na.rm = TRUE)
ylim_range <- c(0, max_mae + max_mae * 0.3)
plot(1:12, colMeans(mae1, na.rm = TRUE), type = "l", col = 2, xlab = "Horizon", ylab = "MAE", ylim = ylim_range)
lines(1:12, colMeans(mae2, na.rm = TRUE), type = "l", col = 3)
legend("topleft", legend = c("Model with lowest AIC", "Model with second lowest AIC"), col = 2:3, lty = 1)

We can see the model with the second lowest AIC actually performs better!

In the inpatient bed used for COVIS time series, we select model ARIMA(0,1,2)x(0,1,1)12(lowest AIC) and model ARIMA(1,1,1)x(0,1,1)12(second lowest AIC).

Code
#n=length(hos_ts2) 49
#n-k=25; 24/12=2; 
k=25
mae1 <- matrix(NA, 2,12) 
mae2 <- matrix(NA, 2,12)

st <- tsp(hos_ts2)[1]+(k-1)/12 #24 observations
# put up to 2
for(i in 1:2)
{
  #xtrain <- window(a10, start=st+(i-k+1)/12, end=st+i/12)
  xtrain <- window(hos_ts2, end=st + i-1)
  xtest <- window(hos_ts2, start=st + (i-1) + 1/12, end=st + i)
  # 1st model
  fit <- Arima(xtrain, order=c(0,1,2), seasonal=list(order=c(0,1,1), period=12),
                include.drift=TRUE, method="ML")
  fcast <- forecast(fit, h=1)
  # 2nd Arima
  fit2 <- Arima(xtrain, order=c(1,1,1), seasonal=list(order=c(0,1,1), period=12),
                include.drift=TRUE, method="ML")
  fcast2 <- forecast(fit2, h=1)
  
  mae1[i,] <- abs(fcast$mean-xtest)
  mae2[i,] <- abs(fcast2$mean-xtest)
  
}

max_mae <- max(c(colMeans(mae1, na.rm = TRUE), colMeans(mae2, na.rm = TRUE)), na.rm = TRUE)
ylim_range <- c(0, max_mae + max_mae * 0.3)
plot(1:12, colMeans(mae1, na.rm = TRUE), type = "l", col = 2, xlab = "Horizon", ylab = "MAE", ylim = ylim_range)
lines(1:12, colMeans(mae2, na.rm = TRUE), type = "l", col = 3)
legend("topleft", legend = c("Model with lowest AIC", "Model with second lowest AIC"), col = 2:3, lty = 1)

We can see the model with the lowest AIC actually performs better!

In the utilization rate for inpatient bed used for COVID time series, we select model ARIMA(0,1,2)x(0,1,1)12(lowest AIC) and model ARIMA(0,1,2)x(1,1,0)12(second lowest AIC).

Code
#n=length(hos_ts3) 49
#n-k=25; 24/12=2; 
k=25
mae1 <- matrix(NA, 2,12) 
mae2 <- matrix(NA, 2,12)

st <- tsp(hos_ts3)[1]+(k-1)/12 #24 observations
# put up to 2
for(i in 1:2)
{
  #xtrain <- window(a10, start=st+(i-k+1)/12, end=st+i/12)
  xtrain <- window(hos_ts3, end=st + i-1)
  xtest <- window(hos_ts3, start=st + (i-1) + 1/12, end=st + i)
  # 1st model
  fit <- Arima(xtrain, order=c(0,1,2), seasonal=list(order=c(0,1,1), period=12),
                include.drift=TRUE, method="ML")
  fcast <- forecast(fit, h=1)
  # 2nd Arima
  fit2 <- Arima(xtrain, order=c(0,1,2), seasonal=list(order=c(1,1,0), period=12),
                include.drift=TRUE, method="ML")
  fcast2 <- forecast(fit2, h=1)
  
  mae1[i,] <- abs(fcast$mean-xtest)
  mae2[i,] <- abs(fcast2$mean-xtest)
  
}

max_mae <- max(c(colMeans(mae1, na.rm = TRUE), colMeans(mae2, na.rm = TRUE)), na.rm = TRUE)
ylim_range <- c(0, max_mae + max_mae * 0.3)
plot(1:12, colMeans(mae1, na.rm = TRUE), type = "l", col = 2, xlab = "Horizon", ylab = "MAE", ylim = ylim_range)
lines(1:12, colMeans(mae2, na.rm = TRUE), type = "l", col = 3)
legend("topleft", legend = c("Model with lowest AIC", "Model with second lowest AIC"), col = 2:3, lty = 1)

We can see the model with the second lowest AIC actually performs better!

In the support rate for democratic time series, we select model ARIMA(0,1,1)x(1,1,0)12(lowest AIC) and model ARIMA(1,1,1)x(1,1,0)12(second lowest AIC).

Code
#n=length(demo_ts) 49
#n-k=25; 24/12=2; 
k=25
mae1 <- matrix(NA, 2,12) 
mae2 <- matrix(NA, 2,12)

st <- tsp(demo_ts)[1]+(k-1)/12 #24 observations
# put up to 2
for(i in 1:2)
{
  #xtrain <- window(a10, start=st+(i-k+1)/12, end=st+i/12)
  xtrain <- window(demo_ts, end=st + i-1)
  xtest <- window(demo_ts, start=st + (i-1) + 1/12, end=st + i)
  # 1st model
  fit <- Arima(xtrain, order=c(0,1,1), seasonal=list(order=c(1,1,0), period=12),
                include.drift=TRUE, method="ML")
  fcast <- forecast(fit, h=1)
  # 2nd Arima
  fit2 <- Arima(xtrain, order=c(1,1,1), seasonal=list(order=c(1,1,0), period=12),
                include.drift=TRUE, method="ML")
  fcast2 <- forecast(fit2, h=1)
  
  mae1[i,] <- abs(fcast$mean-xtest)
  mae2[i,] <- abs(fcast2$mean-xtest)
  
}

max_mae <- max(c(colMeans(mae1, na.rm = TRUE), colMeans(mae2, na.rm = TRUE)), na.rm = TRUE)
ylim_range <- c(0, max_mae + max_mae * 0.3)
plot(1:12, colMeans(mae1, na.rm = TRUE), type = "l", col = 2, xlab = "Horizon", ylab = "MAE", ylim = ylim_range)
lines(1:12, colMeans(mae2, na.rm = TRUE), type = "l", col = 3)
legend("topleft", legend = c("Model with lowest AIC", "Model with second lowest AIC"), col = 2:3, lty = 1) 

We can see the model with the second lowest AIC actually performs better!

In the support rate for independent time series, we select model ARIMA(0,1,1)x(0,1,1)12(lowest AIC) and model ARIMA(1,1,1)x(0,1,1)12(second lowest AIC).

Code
#n=length(inde_ts) 49
#n-k=25; 24/12=2; 
k=25
mae1 <- matrix(NA, 2,12) 
mae2 <- matrix(NA, 2,12)

st <- tsp(inde_ts)[1]+(k-1)/12 #24 observations
# put up to 2
for(i in 1:2)
{
  #xtrain <- window(a10, start=st+(i-k+1)/12, end=st+i/12)
  xtrain <- window(inde_ts, end=st + i-1)
  xtest <- window(inde_ts, start=st + (i-1) + 1/12, end=st + i)
  # 1st model
  fit <- Arima(xtrain, order=c(0,1,1), seasonal=list(order=c(0,1,1), period=12),
                include.drift=TRUE, method="ML")
  fcast <- forecast(fit, h=1)
  # 2nd Arima
  fit2 <- Arima(xtrain, order=c(1,1,1), seasonal=list(order=c(0,1,1), period=12),
                include.drift=TRUE, method="ML")
  fcast2 <- forecast(fit2, h=1)
  
  mae1[i,] <- abs(fcast$mean-xtest)
  mae2[i,] <- abs(fcast2$mean-xtest)
  
}

max_mae <- max(c(colMeans(mae1, na.rm = TRUE), colMeans(mae2, na.rm = TRUE)), na.rm = TRUE)
ylim_range <- c(0, max_mae + max_mae * 0.3)
plot(1:12, colMeans(mae1, na.rm = TRUE), type = "l", col = 2, xlab = "Horizon", ylab = "MAE", ylim = ylim_range)
lines(1:12, colMeans(mae2, na.rm = TRUE), type = "l", col = 3)
legend("topleft", legend = c("Model with lowest AIC", "Model with second lowest AIC"), col = 2:3, lty = 1) 

We can see the model with the second lowest AIC actually performs better!

In the support rate for republican time series, we select model ARIMA(0,1,2)x(0,1,1)12(lowest AIC) and model ARIMA(1,1,1)x(0,1,1)12(second lowest AIC).

Code
#n=length(rep_ts) 49
#n-k=25; 24/12=2; 
k=25
mae1 <- matrix(NA, 2,12) 
mae2 <- matrix(NA, 2,12)

st <- tsp(rep_ts)[1]+(k-1)/12 #24 observations
# put up to 2
for(i in 1:2)
{
  #xtrain <- window(a10, start=st+(i-k+1)/12, end=st+i/12)
  xtrain <- window(rep_ts, end=st + i-1)
  xtest <- window(rep_ts, start=st + (i-1) + 1/12, end=st + i)
  # 1st model
  fit <- Arima(xtrain, order=c(0,1,2), seasonal=list(order=c(0,1,1), period=12),
                include.drift=TRUE, method="ML")
  fcast <- forecast(fit, h=1)
  # 2nd Arima
  fit2 <- Arima(xtrain, order=c(1,1,1), seasonal=list(order=c(0,1,1), period=12),
                include.drift=TRUE, method="ML")
  fcast2 <- forecast(fit2, h=1)
  
  mae1[i,] <- abs(fcast$mean-xtest)
  mae2[i,] <- abs(fcast2$mean-xtest)
  
}

max_mae <- max(c(colMeans(mae1, na.rm = TRUE), colMeans(mae2, na.rm = TRUE)), na.rm = TRUE)
ylim_range <- c(0, max_mae + max_mae * 0.3)
plot(1:12, colMeans(mae1, na.rm = TRUE), type = "l", col = 2, xlab = "Horizon", ylab = "MAE", ylim = ylim_range)
lines(1:12, colMeans(mae2, na.rm = TRUE), type = "l", col = 3)
legend("topleft", legend = c("Model with lowest AIC", "Model with second lowest AIC"), col = 2:3, lty = 1) 

We can see the model with the second lowest AIC actually performs better!